From 600128ccf1129160ed03a3148e14befbde2f2503 Mon Sep 17 00:00:00 2001 From: Ther <1329438302@qq.com> Date: Sun, 9 Apr 2023 21:51:00 +0800 Subject: [PATCH 1/2] fix: change /data to ./data --- datautils.py | 12 ++++++------ quantize/reorder_layer_norm.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datautils.py b/datautils.py index 9e493b3..1194bbf 100644 --- a/datautils.py +++ b/datautils.py @@ -12,8 +12,8 @@ def set_seed(seed): def get_wikitext2(nsamples, seed, seqlen, model,cache_dir): print("get_wikitext2") from datasets import load_dataset - traindata = load_dataset('wikitext', 'wikitext-2-raw-v1',cache_dir='/datasets/tmp/wikitext/', split='train') - testdata = load_dataset('wikitext', 'wikitext-2-raw-v1',cache_dir='/datasets/tmp/wikitext/', split='test') + traindata = load_dataset('wikitext', 'wikitext-2-raw-v1',cache_dir='./datasets/tmp/wikitext/', split='train') + testdata = load_dataset('wikitext', 'wikitext-2-raw-v1',cache_dir='./datasets/tmp/wikitext/', split='test') from transformers import AutoTokenizer if "llama" in model: @@ -39,8 +39,8 @@ def get_wikitext2(nsamples, seed, seqlen, model,cache_dir): def get_ptb(nsamples, seed, seqlen, model,cache_dir): print("get_ptb") from datasets import load_dataset - traindata = load_dataset('ptb_text_only', 'penn_treebank',cache_dir='/datasets/tmp/ptb_text_only/', split='train') - valdata = load_dataset('ptb_text_only', 'penn_treebank',cache_dir='/datasets/tmp/ptb_text_only/', split='validation') + traindata = load_dataset('ptb_text_only', 'penn_treebank',cache_dir='./datasets/tmp/ptb_text_only/', split='train') + valdata = load_dataset('ptb_text_only', 'penn_treebank',cache_dir='./datasets/tmp/ptb_text_only/', split='validation') from transformers import AutoTokenizer if "llama" in model: @@ -67,10 +67,10 @@ def get_c4(nsamples, seed, seqlen, model,cache_dir): print("get_c4") from datasets import load_dataset traindata = load_dataset( - 'allenai/c4', 'allenai--c4', cache_dir='/datasets/tmp/allenai--c4/', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train' + 'allenai/c4', 'allenai--c4', cache_dir='./datasets/tmp/allenai--c4/', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train' ) valdata = load_dataset( - 'allenai/c4', 'allenai--c4', cache_dir='/datasets/tmp/allenai--c4/',data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'}, split='validation' + 'allenai/c4', 'allenai--c4', cache_dir='./datasets/tmp/allenai--c4/',data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'}, split='validation' ) from transformers import AutoTokenizer diff --git a/quantize/reorder_layer_norm.py b/quantize/reorder_layer_norm.py index be7fab1..50b6b32 100644 --- a/quantize/reorder_layer_norm.py +++ b/quantize/reorder_layer_norm.py @@ -3,7 +3,7 @@ from quantize.quantizer import UniformAffineQuantizer from torch.utils.cpp_extension import load -USE_CUDA=False +USE_CUDA=True if USE_CUDA: reorder_layer_norm_fp16 = load( 'reorder_layernorm_fp16', ['./cuda/reorder_layernorm.cu'], From 978d6caacef572b638593d7ff8eb1f7dcf9775d3 Mon Sep 17 00:00:00 2001 From: Ther <1329438302@qq.com> Date: Mon, 10 Apr 2023 00:15:56 +0800 Subject: [PATCH 2/2] fix: fix some typos in cuda --- cuda/test.py | 5 ++--- quantize/reorder_layer_norm.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cuda/test.py b/cuda/test.py index a7099f3..c326732 100644 --- a/cuda/test.py +++ b/cuda/test.py @@ -1,7 +1,7 @@ from torch.utils.cpp_extension import load import torch reorder_layer_norm_fp16 = load( - 'reorder_layernorm_fp16', ['reorder_layernorm.cu'], + 'reorder_layer_norm_fp16', ['reorder_layernorm.cu'], extra_cuda_cflags=['--use_fast_math'], extra_ldflags=["-L/usr/local/cuda/lib64/"]) @@ -16,8 +16,7 @@ def add_cuda_op(x, y,w,b): index[1]=31 index[31]=2 dst_index=torch.argsort(index) - var,mean=torch.var_mean(x,0) - reorder_layer_norm_fp16.forward(x, output,mean,var,w,b,dst_index) + reorder_layer_norm_fp16.forward(x, output,w,b,dst_index) return output bias=torch.arange(0,32,1,device='cuda').half() diff --git a/quantize/reorder_layer_norm.py b/quantize/reorder_layer_norm.py index 50b6b32..f6affcb 100644 --- a/quantize/reorder_layer_norm.py +++ b/quantize/reorder_layer_norm.py @@ -3,10 +3,10 @@ from quantize.quantizer import UniformAffineQuantizer from torch.utils.cpp_extension import load -USE_CUDA=True +USE_CUDA=False if USE_CUDA: reorder_layer_norm_fp16 = load( - 'reorder_layernorm_fp16', ['./cuda/reorder_layernorm.cu'], + 'reorder_layer_norm_fp16', ['./cuda/reorder_layernorm.cu'], extra_cuda_cflags=['--use_fast_math'], extra_ldflags=["-L/usr/local/cuda/lib64/"])