Test HF's implementation of Qwen 2 model (#1406)

Lightning-AI · Nov 13, 2024 · 2b8284d · 2b8284d
1 parent 0a625aa
commit 2b8284d
Showing 1 changed file with 45 additions and 0 deletions.
diff --git a/thunder/tests/test_networks.py b/thunder/tests/test_networks.py
@@ -400,6 +400,51 @@ def test_thunderfx_mistral_nemo_small():
     assert th_backend.subgraph_infos, "Should have at least 1 subgraph"
 
 
+@thunder.tests.framework.requiresCUDA
+def test_hf_qwen2():
+    from thunder.dynamo import ThunderCompiler
+    from transformers import Qwen2Config, Qwen2ForCausalLM
+
+    # https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/blob/main/config.json
+    configuration = Qwen2Config(
+        # Qwen2.5-7B-Instruct uses Grouped-Query Attention, while the default
+        # config uses Multi-Head Attention
+        num_attention_heads=28,
+        num_key_value_heads=4,
+        # Scaled down for testing
+        hidden_size=56,
+        vocab_size=16,
+        max_position_embeddings=32,
+    )
+    configuration.num_hidden_layers = 1
+    with torch.device("cuda"):
+        model = Qwen2ForCausalLM(configuration).to(torch.bfloat16)
+
+    # thunder.jit doesn't work with Qwen2, so we use torch.compile
+    # https://github.com/Lightning-AI/lightning-thunder/issues/1405
+    backend = ThunderCompiler()
+    compiled_model = torch.compile(model, backend=backend, fullgraph=True)
+
+    input_ids = torch.randint(0, configuration.vocab_size, (1, configuration.max_position_embeddings), device="cuda")
+    ref_output = model(input_ids=input_ids, labels=input_ids)
+    ref_loss = ref_output.loss
+
+    compiled_output = compiled_model(input_ids=input_ids, labels=input_ids)
+    compiled_loss = compiled_output.loss
+
+    # Less strict tolerance probably due to different type promotion order for bfloat16
+    # TODO: Investigate why the loss is different
+    # https://github.com/Lightning-AI/lightning-thunder/issues/1407
+    torch.testing.assert_close(compiled_loss, ref_loss, rtol=1e-4, atol=1e-4)
+
+    assert len(backend.subgraph_infos) == 1, "Should have exactly 1 subgraph because of fullgraph=True"
+    loss_grad = torch.randn_like(compiled_loss)
+
+    grads_ref = torch.autograd.grad(ref_loss, model.parameters(), grad_outputs=loss_grad)
+    grads_compiled = torch.autograd.grad(compiled_loss, model.parameters(), grad_outputs=loss_grad)
+    torch.testing.assert_close(grads_ref, grads_compiled, rtol=1e-2, atol=1e-2)
+
+
 LLAMA_3_2_1B_CFG = {
     "architectures": ["LlamaForCausalLM"],
     "attention_bias": False,