{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":791781355,"defaultBranch":"main","name":"mlc-llm","ownerLogin":"krishnaraj36","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2024-04-25T11:06:00.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/45380557?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1723184439.0","currentOid":""},"activityList":{"items":[{"before":null,"after":"31cddcf6d0474671567a3b7320e6982823d27ea4","ref":"refs/heads/cpu_check_device","pushedAt":"2024-08-09T06:20:39.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[CLI] Add CPU check device option\n\nAdded CPU check device option","shortMessageHtmlLink":"[CLI] Add CPU check device option"}},{"before":"c253c610c227c4b93c45fa7a4b3374688ef050ae","after":"d404a9c29d926f06f7924d863713b27a9616b98b","ref":"refs/heads/windows_build_opt","pushedAt":"2024-08-09T06:15:24.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"added window config for kv_cache sch","shortMessageHtmlLink":"added window config for kv_cache sch"}},{"before":null,"after":"c253c610c227c4b93c45fa7a4b3374688ef050ae","ref":"refs/heads/windows_build_opt","pushedAt":"2024-08-09T06:08:02.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[Windows] Windows build target definitions for adreno\n\nAdded windows compilers option for adreno target","shortMessageHtmlLink":"[Windows] Windows build target definitions for adreno"}},{"before":"ec6cc300636e78b93f4cce01c1b6cd49440a0bd2","after":"36055f01fc656dee0297dc36c60393a38ada9171","ref":"refs/heads/main","pushedAt":"2024-08-09T05:58:48.000Z","pushType":"push","commitsCount":275,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[Bench] Json schema dataset (#2775)\n\nThis PR adds the json schema dataset, from\r\nhttps://huggingface.co/datasets/NousResearch/json-mode-eval.","shortMessageHtmlLink":"[Bench] Json schema dataset (mlc-ai#2775)"}},{"before":"2f50f8a71d672802fe4b92238f6461df07a58ac5","after":"f498e3ac34f76ee6376ab0c3384d17b78c35e2aa","ref":"refs/heads/PageKV_attention_opt","pushedAt":"2024-05-08T13:01:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"Increase max thread for android:adreno","shortMessageHtmlLink":"Increase max thread for android:adreno"}},{"before":"9851a07d4ab414f430f63f376e514aedabdc5d54","after":"2f50f8a71d672802fe4b92238f6461df07a58ac5","ref":"refs/heads/PageKV_attention_opt","pushedAt":"2024-05-08T05:14:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"Update kv_cache.py","shortMessageHtmlLink":"Update kv_cache.py"}},{"before":"819b742eb341405d58e24f07025a96c24c900166","after":"9851a07d4ab414f430f63f376e514aedabdc5d54","ref":"refs/heads/PageKV_attention_opt","pushedAt":"2024-05-08T05:06:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"Updated K_local to QK_local","shortMessageHtmlLink":"Updated K_local to QK_local"}},{"before":null,"after":"41dd4dee8e189d30435702c95f210f5635dc4582","ref":"refs/heads/use_fixed_vocab_size","pushedAt":"2024-05-07T07:16:31.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[MODEL] Make constant vocab size for models instead dynamic\n\nUsed model config vocab size in model compilation instead dynamic\nvariable, which degrades the performance of chat application.","shortMessageHtmlLink":"[MODEL] Make constant vocab size for models instead dynamic"}},{"before":null,"after":"819b742eb341405d58e24f07025a96c24c900166","ref":"refs/heads/PageKV_attention_opt","pushedAt":"2024-05-07T07:07:37.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[KVCACHE][TIR] Improved tir schedule for decode tir page attention\n\n 1. Improved tir schedule of page attention (It improved 30% to this\nfunction).\n 2. Enable missing dequant+matmul fusion in ph-2 model","shortMessageHtmlLink":"[KVCACHE][TIR] Improved tir schedule for decode tir page attention"}},{"before":null,"after":"65b19d51c694f5361f4ac1c5a3c2dc3a97751958","ref":"refs/heads/Opt_penality_softmax","pushedAt":"2024-05-07T06:42:53.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[LLM-CHAT] Enable gpu softmax for penality softmax\n\n1. Avoid the cpu softmax for different penality config by\n having copy sync to gpu and use gpu softmax.\n2. Disable decode token time counter for first token.","shortMessageHtmlLink":"[LLM-CHAT] Enable gpu softmax for penality softmax"}},{"before":null,"after":"f9b0ea04906db2e1a1ebb48512453950eaf69f5d","ref":"refs/heads/mlc_opencl_host_ptr","pushedAt":"2024-05-07T06:02:15.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[CMAKE][BUILD] Add user option to enable OpenCL Host ptr","shortMessageHtmlLink":"[CMAKE][BUILD] Add user option to enable OpenCL Host ptr"}},{"before":"e115dde2455711ff62abed377f5611508520ceac","after":"ec6cc300636e78b93f4cce01c1b6cd49440a0bd2","ref":"refs/heads/main","pushedAt":"2024-05-07T05:36:38.000Z","pushType":"push","commitsCount":46,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"Update engine.cc","shortMessageHtmlLink":"Update engine.cc"}},{"before":null,"after":"d0ad3698096503f45636f372f3d21c97a90ae6e4","ref":"refs/heads/mlc_page_attention","pushedAt":"2024-04-25T11:12:49.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"krishnaraj36","name":null,"path":"/krishnaraj36","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45380557?s=80&v=4"},"commit":{"message":"[PYTHON][KVCACHE] Enhance the thread limit for opencl\n\nIt improves 2x time for tir based page attention for opencl adreno.","shortMessageHtmlLink":"[PYTHON][KVCACHE] Enhance the thread limit for opencl"}}],"hasNextPage":false,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOC0wOVQwNjoyMDozOS4wMDAwMDBazwAAAASV8u8Q","startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOC0wOVQwNjoyMDozOS4wMDAwMDBazwAAAASV8u8Q","endCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wNC0yNVQxMToxMjo0OS4wMDAwMDBazwAAAAQ6EePe"}},"title":"Activity ยท krishnaraj36/mlc-llm"}