diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3fc6de32..2d5aced4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,7 @@ jobs: env: JULIA_NUM_THREADS: ${{ matrix.julia-threads }} JL_TRF_TEST_TKR: ${{ matrix.test-hgf-tkr }} + HUGGINGFACEHUB_TOKEN: ${{ secrets.HUGGINGFACEHUB_TOKEN }} strategy: fail-fast: false matrix: diff --git a/test/huggingface/load.jl b/test/huggingface/load.jl index 53fcc10e..c8f51523 100644 --- a/test/huggingface/load.jl +++ b/test/huggingface/load.jl @@ -2,19 +2,19 @@ using Logging using Transformers.HuggingFace model_list = Dict([ - :bert => :[ - Model, ForPreTraining, LMHeadModel, ForMaskedLM, ForNextSentencePrediction, - ForSequenceClassification, ForTokenClassification, ForQuestionAnswering, - ].args, - :roberta => :[ - Model, ForMaskedLM, ForCausalLM, ForSequenceClassification, ForTokenClassification, ForQuestionAnswering, - ].args, - :gpt2 => [:Model, :LMHeadModel], - :t5 => [:Model, :ForConditionalGeneration], - :gpt_neo => [:Model, :ForCausalLM], - :gptj => [:Model, :ForCausalLM], - :gpt_neox => [:Model, :ForCausalLM], - :bloom => [:Model, :ForCausalLM], + # :bert => :[ + # Model, ForPreTraining, LMHeadModel, ForMaskedLM, ForNextSentencePrediction, + # ForSequenceClassification, ForTokenClassification, ForQuestionAnswering, + # ].args, + # :roberta => :[ + # Model, ForMaskedLM, ForCausalLM, ForSequenceClassification, ForTokenClassification, ForQuestionAnswering, + # ].args, + # :gpt2 => [:Model, :LMHeadModel], + # :t5 => [:Model, :ForConditionalGeneration], + # :gpt_neo => [:Model, :ForCausalLM], + # :gptj => [:Model, :ForCausalLM], + # :gpt_neox => [:Model, :ForCausalLM], + # :bloom => [:Model, :ForCausalLM], # :llama => [:Model, :ForCausalLM], No hf-internal-testing/tiny-random-$hgf_type_name ]) diff --git a/test/huggingface/tokenizer.jl b/test/huggingface/tokenizer.jl index 021e3acb..abb5b371 100644 --- a/test/huggingface/tokenizer.jl +++ b/test/huggingface/tokenizer.jl @@ -1,3 +1,4 @@ +@assert !isnothing(HFHUB_Token) using Artifacts, LazyArtifacts const artifact_dir = @artifact_str("xnli_dev") const xnli = joinpath(artifact_dir, "xnli-dev.txt") @@ -22,18 +23,18 @@ macro tryrun(ex, msg = nothing) end function test_tokenizer(name, corpus; to = TimerOutput()) - global hgf_trf + global hgf_trf, HFHUB_Token @info "Validate $name tokenizer" @info "Load $name configure file in Julia" config = @tryrun begin @timeit to "jlload cfg" begin - cfg = HuggingFace.load_config(name) + cfg = HuggingFace.load_config(name; auth_token = HFHUB_Token) HuggingFace.HGFConfig(cfg; layer_norm_eps = 1e-9, layer_norm_epsilon = 1e-9) end end "Failed to load $name configure file in Julia, probably unsupported" @info "Load $name configure file in Python" pyconfig = @tryrun begin - @timeit to "pyload cfg" hgf_trf.AutoConfig.from_pretrained(name, + @timeit to "pyload cfg" hgf_trf.AutoConfig.from_pretrained(name, token = HFHUB_Token, layer_norm_eps = 1e-9, layer_norm_epsilon = 1e-9) end "Failed to load $name configure file in Python, probably unsupported" vocab_size = if haskey(config, :vocab_size) @@ -44,12 +45,12 @@ function test_tokenizer(name, corpus; to = TimerOutput()) end @info "Loading $name tokenizer in Python" hgf_tkr = @tryrun begin - @timeit to "pyload tkr" hgf_trf.AutoTokenizer.from_pretrained(name, config = pyconfig) + @timeit to "pyload tkr" hgf_trf.AutoTokenizer.from_pretrained(name, config = pyconfig, token = HFHUB_Token) end "Failed to load $name tokenizer in Python" @info "Python $name tokenizer loaded successfully" @info "Loading $name tokenizer in Julia" tkr = @tryrun begin - @timeit to "jlload tkr" HuggingFace.load_tokenizer(name; config) + @timeit to "jlload tkr" HuggingFace.load_tokenizer(name; config, auth_token = HFHUB_Token) end "Failed to load $name tokenizer in Julia" @info "Julia $name tokenizer loaded successfully" @info "Testing: $name Tokenizer" @@ -113,8 +114,9 @@ end @testset "HuggingFace Tokenizer" begin corpus = readlines(xnli) for name in [ - "bert-base-cased", "bert-base-uncased", "roberta-base", "gpt2", "t5-small", "google/flan-t5-xl", - "EleutherAI/pythia-70m", "databricks/dolly-v2-3b", "bigscience/bloom-560m", "TheBloke/Llama-2-7B-Chat-GPTQ", + # "bert-base-cased", "bert-base-uncased", "roberta-base", "gpt2", "t5-small", "google/flan-t5-xl", + # "EleutherAI/pythia-70m", "databricks/dolly-v2-3b", "bigscience/bloom-560m", + "meta-llama/Llama-2-7b-chat-hf" ] @testset "$name Tokenizer" begin to = TimerOutput() diff --git a/test/runtests.jl b/test/runtests.jl index 0ace37c0..bf071c1d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,6 +11,8 @@ using Flux: gradient using CUDA +const HFHUB_Token = get(ENV, "HUGGINGFACEHUB_TOKEN", nothing) + function envget(var) e = get(ENV, var, false) e isa Bool && return e @@ -37,7 +39,7 @@ dones(arg...) = ones(arg...) |> device dzeros(arg...) = zeros(arg...) |> device const tests = [ - "tokenizer", + # "tokenizer", "huggingface", ] @@ -55,6 +57,6 @@ Random.seed!(0) end end end - include("loss.jl") - include("grad.jl") + # include("loss.jl") + # include("grad.jl") end