{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":155727723,"defaultBranch":"master","name":"lc0","ownerLogin":"almaudoh","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2018-11-01T14:28:46.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/4356813?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1723406055.0","currentOid":""},"activityList":{"items":[{"before":"237766a90330fc33bafd566f80784d56b4bd5810","after":"a09300cbb2c2c92aca1bec1de30d974a7f35a33e","ref":"refs/heads/rpe-cuda","pushedAt":"2024-09-08T17:56:00.000Z","pushType":"push","commitsCount":8,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Merge branch 'master' into rpe-cuda","shortMessageHtmlLink":"Merge branch 'master' into rpe-cuda"}},{"before":"3759d65fb39786815b951a84b04ebae576befd61","after":"b14160cf806b40fd403a0be177657d9ba7cbd7b5","ref":"refs/heads/master","pushedAt":"2024-09-08T17:53:04.000Z","pushType":"push","commitsCount":7,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"fix opset18 handling for reduce ops in xla hlo builing (#2066)","shortMessageHtmlLink":"fix opset18 handling for reduce ops in xla hlo builing (LeelaChessZer…"}},{"before":"75a099fe36b74f908c6bb8b9386fe9327428c028","after":"237766a90330fc33bafd566f80784d56b4bd5810","ref":"refs/heads/rpe-cuda","pushedAt":"2024-09-08T17:37:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fuse RPE-Q and RPE-K kernels. 10% speedup.","shortMessageHtmlLink":"Fuse RPE-Q and RPE-K kernels. 10% speedup."}},{"before":"70d267287aa33dfa9c282b1a14b3dda3343aa870","after":"75a099fe36b74f908c6bb8b9386fe9327428c028","ref":"refs/heads/rpe-cuda","pushedAt":"2024-09-02T12:14:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Split kernel dot product workload among multiple threads in warp","shortMessageHtmlLink":"Split kernel dot product workload among multiple threads in warp"}},{"before":null,"after":"efa8a0112e5217ad9dfc03bcbfa8d6e9e3998afa","ref":"refs/heads/rpe-cuda-batched-gemm","pushedAt":"2024-08-11T19:54:15.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Using batched gemm for RPE matmul. Not very efficient.","shortMessageHtmlLink":"Using batched gemm for RPE matmul. Not very efficient."}},{"before":"506a4c4647792e954b4ccec8c6ebd411ab9c4cff","after":"70d267287aa33dfa9c282b1a14b3dda3343aa870","ref":"refs/heads/rpe-cuda","pushedAt":"2024-07-21T18:15:07.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fix bugs in fp32 and non-rpe code paths.","shortMessageHtmlLink":"Fix bugs in fp32 and non-rpe code paths."}},{"before":"2ee1637780a55891c4c268ce443f3730f8851b3d","after":"506a4c4647792e954b4ccec8c6ebd411ab9c4cff","ref":"refs/heads/rpe-cuda","pushedAt":"2024-07-21T03:26:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Improved speed by optimizing rpe kernel. Still not enough yet.","shortMessageHtmlLink":"Improved speed by optimizing rpe kernel. Still not enough yet."}},{"before":"9614d656c5a051e04a5a8fa701de6ac1ca02cfe5","after":"2ee1637780a55891c4c268ce443f3730f8851b3d","ref":"refs/heads/rpe-cuda","pushedAt":"2024-07-15T00:28:51.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Minor optimization of kernel. Not much perf diff.","shortMessageHtmlLink":"Minor optimization of kernel. Not much perf diff."}},{"before":"32881af0778ceef7bed14774cbb65405617428c7","after":"9614d656c5a051e04a5a8fa701de6ac1ca02cfe5","ref":"refs/heads/rpe-cuda","pushedAt":"2024-07-10T02:11:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fix attention scaling factor for non-rpe nets.","shortMessageHtmlLink":"Fix attention scaling factor for non-rpe nets."}},{"before":null,"after":"32881af0778ceef7bed14774cbb65405617428c7","ref":"refs/heads/rpe-cuda","pushedAt":"2024-07-10T01:40:51.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Add relative positional encoding, RPE in cuda backend.","shortMessageHtmlLink":"Add relative positional encoding, RPE in cuda backend."}},{"before":null,"after":"2372467f55058c2710d127022d049469c1189f80","ref":"refs/heads/rpe-metal","pushedAt":"2024-07-09T10:18:59.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fix bugs in weights loading and einsum implementation.","shortMessageHtmlLink":"Fix bugs in weights loading and einsum implementation."}},{"before":"f80dd7ecbc327e36722d0fd20db23bfb1d2f3710","after":"3759d65fb39786815b951a84b04ebae576befd61","ref":"refs/heads/master","pushedAt":"2024-06-23T06:33:42.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"fix build outside x86_64/aarch64 (#2010)\n\nthe current logic includes the x86 simd header any time the platform\r\nis !arm, which is backwards","shortMessageHtmlLink":"fix build outside x86_64/aarch64 (LeelaChessZero#2010)"}},{"before":"b5cdc3e9795399ee9a58dce2ee208cde3ea7025a","after":"955ba909d1c5e1ee31043789ed41aa063a994c67","ref":"refs/heads/multihead-arch-cuda-cutlass-fmha","pushedAt":"2024-06-15T15:45:49.000Z","pushType":"push","commitsCount":21,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Merge branch 'master' into multihead-arch-cuda-cutlass-fmha","shortMessageHtmlLink":"Merge branch 'master' into multihead-arch-cuda-cutlass-fmha"}},{"before":null,"after":"7fceeebbb91091a98af331a878679689735daf5f","ref":"refs/heads/multihead-arch-cutlass-int8-qkv-average","pushedAt":"2024-06-02T07:11:52.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Implement fused QKV with averaged scaling factors.","shortMessageHtmlLink":"Implement fused QKV with averaged scaling factors."}},{"before":"7fce117e4ef8f1c7f4e94838cd11db25769e3fb9","after":"f80dd7ecbc327e36722d0fd20db23bfb1d2f3710","ref":"refs/heads/master","pushedAt":"2024-06-02T02:23:52.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Improve default WDLDrawRateTarget handling (#2029)","shortMessageHtmlLink":"Improve default WDLDrawRateTarget handling (LeelaChessZero#2029)"}},{"before":"870ebba19979013d005416ed9a2e5ed243667ecd","after":"80cc6737112447b65756d966e84637fbdcf73efb","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-06-02T02:22:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Split QKV to allow use of int8->int8 cutlass matmul.","shortMessageHtmlLink":"Split QKV to allow use of int8->int8 cutlass matmul."}},{"before":"adad5452f91cc7ea649a3f535d61595eaaf21802","after":"870ebba19979013d005416ed9a2e5ed243667ecd","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-16T21:52:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Remove epsilon from quantize","shortMessageHtmlLink":"Remove epsilon from quantize"}},{"before":"dddc978162dff8de6620b4c2dc902c5d83ba3922","after":"adad5452f91cc7ea649a3f535d61595eaaf21802","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-16T10:51:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Implement int8 in all gemms except QKV. Fuse dequant-bias + add+quantize next layer","shortMessageHtmlLink":"Implement int8 in all gemms except QKV. Fuse dequant-bias + add+quant…"}},{"before":"4e3a650d0792b4e88f5b88e80c137b890736dd68","after":"dddc978162dff8de6620b4c2dc902c5d83ba3922","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-13T12:20:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fuse FFN2 quantize to FFN2 dequantize+bias-add. 2% speedup.","shortMessageHtmlLink":"Fuse FFN2 quantize to FFN2 dequantize+bias-add. 2% speedup."}},{"before":"e82761a5e13ce25607588a935fa48850e137d467","after":"4e3a650d0792b4e88f5b88e80c137b890736dd68","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-13T08:13:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fix scratch size and change epiloge compute to int32.","shortMessageHtmlLink":"Fix scratch size and change epiloge compute to int32."}},{"before":"28912e6ec54da589c059c6792a55e43ac609b871","after":"e82761a5e13ce25607588a935fa48850e137d467","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-13T02:15:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fix promotion to double for clipMatrix.","shortMessageHtmlLink":"Fix promotion to double for clipMatrix."}},{"before":"3201c0bf859280be51a28830684ab52ec2c5e3e5","after":"28912e6ec54da589c059c6792a55e43ac609b871","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-13T01:30:20.000Z","pushType":"push","commitsCount":16,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Merge remote-tracking branch 'upstream/master' into multihead-arch-cutlass-int8","shortMessageHtmlLink":"Merge remote-tracking branch 'upstream/master' into multihead-arch-cu…"}},{"before":"868b9ac8d0274925605e601d289b55cf037aa6c7","after":"3201c0bf859280be51a28830684ab52ec2c5e3e5","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-13T01:19:16.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Merge branch 'multihead-arch-cutlass-int8' of https://github.com/almaudoh/lc0 into multihead-arch-cutlass-int8","shortMessageHtmlLink":"Merge branch 'multihead-arch-cutlass-int8' of https://github.com/alma…"}},{"before":"09fcd568d797b2249be8e2643ebcab17381c9dc7","after":"868b9ac8d0274925605e601d289b55cf037aa6c7","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-13T01:17:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Fix bugs in int8 implementation - ith extra (ssuper) pair of eyes from @tilps","shortMessageHtmlLink":"Fix bugs in int8 implementation - ith extra (ssuper) pair of eyes from "}},{"before":"01c24e5cf41789500127274910c2b08218a3417e","after":"09fcd568d797b2249be8e2643ebcab17381c9dc7","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-12T02:46:26.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Change gemms to int32 - wip","shortMessageHtmlLink":"Change gemms to int32 - wip"}},{"before":"0f61c2f0f5400b3fc56ac8b02b03e8835228af10","after":"01c24e5cf41789500127274910c2b08218a3417e","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-08T03:33:28.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Update gemms to provide int8->fp32 for correct results. Remove old and unused code.","shortMessageHtmlLink":"Update gemms to provide int8->fp32 for correct results. Remove old an…"}},{"before":"451fbf34b6738805ec611073975d3992cb34d9cc","after":"0f61c2f0f5400b3fc56ac8b02b03e8835228af10","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-05-03T05:53:24.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"Remove debug outputs.","shortMessageHtmlLink":"Remove debug outputs."}},{"before":"8d5c9cdf45649c9729a7cd2e1e043632ed409bfa","after":"7fce117e4ef8f1c7f4e94838cd11db25769e3fb9","ref":"refs/heads/master","pushedAt":"2024-05-02T14:38:34.000Z","pushType":"push","commitsCount":15,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"},"commit":{"message":"fix openblas appveyor build (#2022)","shortMessageHtmlLink":"fix openblas appveyor build (LeelaChessZero#2022)"}},{"before":"0a700939a216c3bd049b4f33847328f710211e6a","after":"451fbf34b6738805ec611073975d3992cb34d9cc","ref":"refs/heads/multihead-arch-cutlass-int8","pushedAt":"2024-04-22T12:33:36.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"almaudoh-1","name":null,"path":"/almaudoh-1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/167550661?s=80&v=4"},"commit":{"message":"WIP. Reworked int8 to use scaling factors stored in weights. Added kernels for clipping of inputs for non-int8 inference.","shortMessageHtmlLink":"WIP. Reworked int8 to use scaling factors stored in weights. Added ke…"}},{"before":"73155a157bf2e9ade839d1bd87f8ceb159c89f57","after":null,"ref":"refs/heads/multihead-arch-blas","pushedAt":"2024-03-23T15:04:57.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"almaudoh","name":null,"path":"/almaudoh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/4356813?s=80&v=4"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOS0wOFQxNzo1NjowMC4wMDAwMDBazwAAAASwN8gP","endCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wMy0yM1QxNTowNDo1Ny4wMDAwMDBazwAAAAQdfTsT"}},"title":"Activity · almaudoh/lc0"}