Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added compile fixture #37

Draft
wants to merge 32 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
90d48b8
added compile fixture
markkraay Aug 6, 2024
c3f2312
added compile fixture to where tests
markkraay Aug 6, 2024
1423b20
compile fixture for unary
markkraay Aug 6, 2024
7db02df
added compile fixture for strongly_typed
markkraay Aug 6, 2024
f128221
change `compute` to `func`
markkraay Aug 6, 2024
84469e5
use `eager` instead of `lazy`
markkraay Aug 6, 2024
e56af4a
remove baked in args from compile fixture
markkraay Aug 6, 2024
c7d4757
enable more tests
markkraay Aug 6, 2024
688cf3f
enable compile fixture in more ops
markkraay Aug 6, 2024
546f5d1
enabled compile_fixture for test_unsqueeze
markkraay Aug 12, 2024
cd081d9
Also check shape in test_unsqueeze
markkraay Aug 12, 2024
0e07c83
enable test_slice
markkraay Aug 12, 2024
c194230
fixed bug with filtering kwargs
markkraay Aug 12, 2024
910911e
enabled reshape and reduce
markkraay Aug 12, 2024
4e5a9b2
added comment for allclose
markkraay Aug 12, 2024
e6636d8
try to dynamically add markers for compile / eager
markkraay Aug 13, 2024
eb13a3d
enable test_flip: all tests passing
markkraay Aug 13, 2024
9cf2bc6
enable test_full: failing test_shape_tensor[compile]
markkraay Aug 13, 2024
c35b45f
remove ir dump from test_full
markkraay Aug 13, 2024
8c5ad76
enable test_iota; failing test_iota_from_shape_tensor[compile]
markkraay Aug 13, 2024
18bcefc
remove extra fixture
markkraay Aug 13, 2024
f9f4cb3
enable test_linear; all tests passing
markkraay Aug 13, 2024
82edeaf
enable test_matrix_multiplication; all tests passing
markkraay Aug 13, 2024
0e02f61
enable test_reshape; all tests passing
markkraay Aug 13, 2024
a768de7
enable test_cast: failing test_cast[compile-*] & test_cast_from_bool[…
markkraay Aug 13, 2024
623f54c
enable test_concatenate: all tests pass
markkraay Aug 13, 2024
c998d30
enable & reformat test_plugin: all test passing
markkraay Aug 13, 2024
8ce3f62
enable test_convolution; all tests pass
markkraay Aug 14, 2024
a1d1735
enable test_quantize; filed issue #102
markkraay Aug 14, 2024
7d7de04
enable test_functional; failing many
markkraay Aug 14, 2024
e146aa1
enabled test_conv_transpose; all tests passing
markkraay Aug 14, 2024
0575c5b
fixed test_cast
markkraay Aug 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions tripy/tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import pytest

import tripy as tp


@pytest.fixture(params=["compile", "eager"])
def compile_fixture(request):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#102 (comment)

compile_fixture should be able to specify which args are constants / parameters. For Q/DQ, scale should be Parameters.

def wrapper(func, *args, **kwargs):
def get_shape(x: tp.Tensor):
x.eval()
return tp.InputInfo(x.trace_tensor.shape, dtype=x.dtype)

mode = request.param
if mode == "compile":
compiler = tp.Compiler(func)
# Cast appropriate args / kwargs to use `tp.InputInfo`
compile_args = tuple(map(lambda x: get_shape(x) if isinstance(x, tp.Tensor) else x, list(args)))
compile_kwargs = dict((k, get_shape(v) if isinstance(v, tp.Tensor) else v) for k, v in kwargs.items())
compiled_func = compiler.compile(*compile_args, **compile_kwargs)
# Remove baked in args, aka, only keep tp.Tensor's
args = tuple(filter(lambda x: isinstance(x, tp.Tensor), args))
kwargs = dict(filter(lambda kv: isinstance(kv[1], tp.Tensor), kwargs.items()))
return compiled_func(*args, **kwargs)
elif mode == "eager":
return func(*args, **kwargs)

return wrapper
2 changes: 2 additions & 0 deletions tripy/tests/integration/test_allclose.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class TestAllClose:
)
def test_all_close_float32(self, tensor_a, tensor_b, rtol, atol):
np_result = torch.allclose(torch.FloatTensor(tensor_a), torch.FloatTensor(tensor_b), rtol=rtol, atol=atol)
# Cannot use `compile_fixture` here since `tp.Compiler` only works if the output of the function is a Tensor
# and the output of `tp.allclose` is a bool.
tp_result = tp.allclose(
tp.Tensor(tensor_a, dtype=tp.float32), tp.Tensor(tensor_b, dtype=tp.float32), rtol=rtol, atol=atol
)
Expand Down
21 changes: 14 additions & 7 deletions tripy/tests/integration/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,42 @@ class TestCast:
# (np.int8, bool),
],
)
def test_cast(self, input_dtype, target_dtype):
def test_cast(self, input_dtype, target_dtype, compile_fixture):
tp_input_dtype = np_to_tripy_dtype(input_dtype)
tp_target_dtype = np_to_tripy_dtype(target_dtype)

# TODO(#222): Integer casts with negative numbers fail in many cases
input_tensor = tp.Tensor([0, 1, 2], dtype=tp_input_dtype)
np_input = cp.from_dlpack(input_tensor).get()
output = tp.cast(input_tensor, tp_target_dtype)

output = compile_fixture(tp.cast, input_tensor, tp_target_dtype)

assert np.array_equal(cp.from_dlpack(output).get(), np_input.astype(target_dtype))

# these dtypes don't have analogues in numpy
@pytest.mark.parametrize("source_dtype", [pytest.param(tp.float8, marks=skip_if_older_than_sm89), tp.int4])
def test_cast_quantized_dtypes_into_bool(self, source_dtype):
def test_cast_quantized_dtypes_into_bool(self, source_dtype, compile_fixture):
# TODO(#223): Using an odd size leads to a strange crash, so can't just use [-1.0, 0.0, 1.0]
input_tensor = tp.Tensor([-1.0, 0.0, 0.0, 1.0], dtype=tp.float32)
q = tp.quantize(input_tensor, scale=1.0, dtype=source_dtype)
output = tp.cast(q, tp.bool)

def func(input):
q = tp.quantize(input, scale=1.0, dtype=source_dtype)
output = tp.cast(q, tp.bool)
return output

output = compile_fixture(func, input_tensor)

assert cp.from_dlpack(output).get().tolist() == [True, False, False, True]

@pytest.mark.parametrize("target_dtype", [np.float32, np.int32, np.int64, np.int8])
def test_cast_from_bool(self, target_dtype):
def test_cast_from_bool(self, target_dtype, compile_fixture):
tp_target_dtype = np_to_tripy_dtype(target_dtype)

# in principle, it is not important what *specific* values we convert to,
# so long as false is mapped to 0 and true to nonzero
input_tensor = tp.Tensor([False, True], dtype=tp.bool)
np_input = cp.from_dlpack(input_tensor).get()
output = tp.cast(input_tensor, tp_target_dtype)
output = compile_fixture(tp.cast, input_tensor, tp_target_dtype)

tp_compare_to_zero = cp.from_dlpack(output).get() == 0
np_compare_to_zero = np_input.astype(target_dtype) == 0
Expand Down
8 changes: 4 additions & 4 deletions tripy/tests/integration/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ class TestConcatenate:
([(2, 3, 4)], 0),
],
)
def test_concat(self, tensor_shapes, dim):
def test_concat(self, tensor_shapes, dim, compile_fixture):
tensors = [tp.ones(shape) for shape in tensor_shapes]
out = tp.concatenate(tensors, dim=dim)
out = compile_fixture(tp.concatenate, tensors, dim=dim)
assert np.array_equal(
cp.from_dlpack(out).get(), np.concatenate([np.ones(shape) for shape in tensor_shapes], axis=dim)
)
Expand All @@ -44,8 +44,8 @@ def test_concat(self, tensor_shapes, dim):
"tensor_shapes, dim",
[([(2, 3, 4), (2, 4, 4)], 0), ([(4, 5, 6), (4, 1, 6)], -1)],
)
def test_negative_concat(self, tensor_shapes, dim):
def test_negative_concat(self, tensor_shapes, dim, compile_fixture):
tensors = [tp.ones(shape) for shape in tensor_shapes]
with helper.raises(tp.TripyException, match=f"not compatible at non-concat index"):
out = tp.concatenate(tensors, dim=dim)
out = compile_fixture(tp.concatenate, tensors, dim=dim)
print(out)
22 changes: 11 additions & 11 deletions tripy/tests/integration/test_conv_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class ConvTestCase:
@pytest.mark.parametrize("torch_dtype,tp_dtype", DTYPES)
class TestConvolution:
@pytest.mark.parametrize("test_case", test_cases_transpose_1d)
def test_transposed_convolution_1d(self, torch_dtype, tp_dtype, test_case):
def test_transposed_convolution_1d(self, torch_dtype, tp_dtype, test_case, compile_fixture):
if not test_case.torch_pad:
test_case.torch_pad = 0
if not test_case.stride:
Expand Down Expand Up @@ -129,14 +129,14 @@ def test_transposed_convolution_1d(self, torch_dtype, tp_dtype, test_case):
conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

rtol_ = 1e-3
assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
assert output.shape == expected.shape

@pytest.mark.parametrize("test_case", test_cases_transpose_2d)
def test_transposed_convolution_2d(self, torch_dtype, tp_dtype, test_case):
def test_transposed_convolution_2d(self, torch_dtype, tp_dtype, test_case, compile_fixture):
if not test_case.torch_pad:
test_case.torch_pad = 0
if not test_case.stride:
Expand Down Expand Up @@ -184,14 +184,14 @@ def test_transposed_convolution_2d(self, torch_dtype, tp_dtype, test_case):
conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

rtol_ = 1e-3
assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
assert output.shape == expected.shape

@pytest.mark.parametrize("test_case", test_cases_transpose_3d)
def test_transposed_convolution_3d(self, torch_dtype, tp_dtype, test_case):
def test_transposed_convolution_3d(self, torch_dtype, tp_dtype, test_case, compile_fixture):
if not test_case.torch_pad:
test_case.torch_pad = 0
if not test_case.stride:
Expand Down Expand Up @@ -239,12 +239,12 @@ def test_transposed_convolution_3d(self, torch_dtype, tp_dtype, test_case):
conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)
rtol_ = 1.3e-6 if tp_dtype == tp.float32 else 1.6e-3
assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
assert output.shape == expected.shape

def test_transposed_equivalency(self, torch_dtype, tp_dtype):
def test_transposed_equivalency(self, torch_dtype, tp_dtype, compile_fixture):
input_torch = torch.arange(9, dtype=torch.float32, device=torch.device("cuda")).reshape(*(1, 1, 3, 3))
input = tp.cast(tp.Tensor(input_torch), tp_dtype)

Expand Down Expand Up @@ -277,8 +277,8 @@ def test_transposed_equivalency(self, torch_dtype, tp_dtype):

expected = conv_layer_torch(input_torch).to(torch_dtype)
expected_transpose = conv_transpose_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output_transpose = conv_transpose_layer(input)
output = compile_fixture(conv_layer, input)
output_transpose = compile_fixture(conv_transpose_layer, input)

rtol_ = 2e-7 if tp_dtype == tp.float32 else 9e-4
assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
Expand All @@ -291,7 +291,7 @@ def test_transposed_equivalency(self, torch_dtype, tp_dtype):
assert expected.shape == expected_transpose.shape

@pytest.mark.parametrize("test_case", test_cases_transpose_downscale)
def test_transposed_downscale(self, torch_dtype, tp_dtype, test_case):
def test_transposed_downscale(self, torch_dtype, tp_dtype, test_case, compile_fixture):
input_torch = torch.arange(9, dtype=torch.float32, device=torch.device("cuda")).reshape(*(1, 1, 3, 3))
input = tp.cast(tp.Tensor(input_torch), tp_dtype)

Expand Down Expand Up @@ -320,7 +320,7 @@ def test_transposed_downscale(self, torch_dtype, tp_dtype, test_case):
conv_layer.weight = tp.cast(tp.Tensor(conv_layer_torch.weight.data), tp_dtype)

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

rtol_ = 1e-15 if tp_dtype == tp.float32 else 1e-10
assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
Expand Down
18 changes: 9 additions & 9 deletions tripy/tests/integration/test_convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class ConvTestCase:
@pytest.mark.parametrize("torch_dtype,tp_dtype", DTYPES)
class TestConvolution:
@pytest.mark.parametrize("test_case", test_cases_1d)
def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
def test_convolution_1d(self, torch_dtype, tp_dtype, test_case, compile_fixture):
if not test_case.torch_pad:
test_case.torch_pad = 0
if not test_case.stride:
Expand All @@ -84,7 +84,7 @@ def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
test_case.dilation = (1,)

input_torch = torch.arange(40, dtype=torch.float32, device=torch.device("cuda")).reshape(*(2, 4, 5))
input = tp.cast(tp.Tensor(input_torch), tp_dtype)
input = tp.cast(tp.Tensor(input_torch, device=tp.device("gpu")), tp_dtype)

conv_layer_torch = torch.nn.Conv1d(
4,
Expand Down Expand Up @@ -122,7 +122,7 @@ def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

# FP32 kernel seems to lose some precision, and FP16 needs to be run in FP32 on torch
rtol_ = 4e-5 if tp_dtype == tp.float32 else 1e-3
Expand All @@ -131,7 +131,7 @@ def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
assert output_torch.shape == expected.shape

@pytest.mark.parametrize("test_case", test_cases_2d)
def test_convolution_2d(self, torch_dtype, tp_dtype, test_case):
def test_convolution_2d(self, torch_dtype, tp_dtype, test_case, compile_fixture):
if not test_case.torch_pad:
test_case.torch_pad = 0
if not test_case.stride:
Expand Down Expand Up @@ -178,15 +178,15 @@ def test_convolution_2d(self, torch_dtype, tp_dtype, test_case):
conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

rtol_ = 2e-7 if tp_dtype == tp.float32 else 1.5e-3
output_torch = torch.from_dlpack(output)
assert torch.allclose(output_torch, expected, rtol=rtol_)
assert output_torch.shape == expected.shape

@pytest.mark.parametrize("test_case", test_cases_3d)
def test_convolution_3d(self, torch_dtype, tp_dtype, test_case):
def test_convolution_3d(self, torch_dtype, tp_dtype, test_case, compile_fixture):
pytest.skip("TODO (#260): Fix accuracy bugs in 3D conv")
if not test_case.torch_pad:
test_case.torch_pad = 0
Expand Down Expand Up @@ -245,14 +245,14 @@ def test_convolution_3d(self, torch_dtype, tp_dtype, test_case):
return

expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

rtol_ = 2e-4 if tp_dtype == tp.float32 else 1.4e-3 # 3d conv has greater accumulation error
output_torch = torch.from_dlpack(output)
assert torch.allclose(output_torch, expected, rtol=rtol_)
assert output_torch.shape == expected.shape

def test_uneven_padding(self, torch_dtype, tp_dtype):
def test_uneven_padding(self, torch_dtype, tp_dtype, compile_fixture):
input_torch = torch.arange(200, dtype=torch.float32, device=torch.device("cuda")).reshape(*(2, 4, 5, 5))
input = tp.cast(tp.Tensor(input_torch), tp_dtype)

Expand Down Expand Up @@ -282,7 +282,7 @@ def test_uneven_padding(self, torch_dtype, tp_dtype):

input_torch = torch_pad(input_torch)
expected = conv_layer_torch(input_torch).to(torch_dtype)
output = conv_layer(input)
output = compile_fixture(conv_layer, input)

rtol_ = 2e-7 if tp_dtype == tp.float32 else 2e-3
output_torch = torch.from_dlpack(output)
Expand Down
21 changes: 10 additions & 11 deletions tripy/tests/integration/test_dequantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,28 @@ class TestDequantize:
@pytest.mark.parametrize(
"dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
)
def test_dequantize_int8_per_tensor(self, dtype):
def test_dequantize_int8_per_tensor(self, dtype, compile_fixture):
data = [4, 8]
input_tp = tp.Tensor(data, dtype=tp.int8)
scale = torch.tensor(0.5, dtype=TORCH_DTYPES[dtype])
scale_tp = tp.Tensor(scale, dtype=dtype)
dequantized = tp.dequantize(input_tp, scale_tp, dtype)
dequantized = compile_fixture(tp.dequantize, input_tp, scale_tp, dtype)
expected = torch.tensor(data) * scale
output = torch.from_dlpack(dequantized)
assert torch.allclose(expected, output.to("cpu"))

@pytest.mark.parametrize(
"dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
)
def test_dequantize_int8_per_channel(self, dtype):
def test_dequantize_int8_per_channel(self, dtype, compile_fixture):
# TODO: Fix in #153
if dtype == tp.float16:
pytest.skip("TRT does not support fp16->int8 per-channel dequant.")
data = [[4, 8], [4, 8]]
input_tp = tp.Tensor(data, dtype=tp.int8)
scale = torch.tensor([0.8, 0.9], dtype=TORCH_DTYPES[dtype])
scale_tp = tp.Tensor(scale, dtype=dtype)
dequantized = tp.dequantize(input_tp, scale_tp, dtype, dim=0)
dequantized = compile_fixture(tp.dequantize, input_tp, scale_tp, dtype, dim=0)
expected = torch.tensor(data) * scale.reshape((2, 1))
output = torch.from_dlpack(dequantized)
assert torch.allclose(expected, output.to("cpu"))
Expand All @@ -60,14 +60,13 @@ def test_dequantize_int8_per_channel(self, dtype):
"dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
)
@skip_if_older_than_sm89
def test_dequantize_fp8_per_tensor(self, dtype):
def test_dequantize_fp8_per_tensor(self, dtype, compile_fixture):
data_value = [1.0, 1.0]
input_tp = tp.Tensor(data_value, dtype=tp.float8)
scale = torch.tensor(0.5, dtype=TORCH_DTYPES[dtype])
scale_tp = tp.Tensor(scale, dtype=dtype)
dequantized = tp.dequantize(input_tp, scale_tp, dtype)
dequantized = compile_fixture(tp.dequantize, input_tp, scale_tp, dtype)
assert dequantized.dtype == dtype
print(dequantized)
expected = torch.Tensor(data_value) * scale
output = torch.from_dlpack(dequantized).to(dtype=torch.float32)
assert torch.allclose(expected, output.to("cpu"))
Expand All @@ -76,23 +75,23 @@ def test_dequantize_fp8_per_tensor(self, dtype):
"dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
)
@skip_if_older_than_sm89
def test_dequantize_fp8_per_channel(self, dtype):
def test_dequantize_fp8_per_channel(self, dtype, compile_fixture):
data_value = [[1.0, 1.0], [1.0, 1.0]]
input_tp = tp.Tensor(data_value, dtype=tp.float8)
scale = torch.tensor([0.8, 0.9], dtype=TORCH_DTYPES[dtype])
scale_tp = tp.Tensor(scale, dtype=dtype)
dequantized = tp.dequantize(input_tp, scale_tp, dtype, dim=0)
dequantized = compile_fixture(tp.dequantize, input_tp, scale_tp, dtype, dim=0)
assert dequantized.dtype == dtype
print(dequantized)
expected = torch.Tensor(data_value) * scale.reshape((2, 1))
output = torch.from_dlpack(dequantized).to(dtype=torch.float32)
assert torch.allclose(expected, output.to("cpu"))

def test_negative_non_constant_scale(self):
def test_negative_non_constant_scale(self, compile_fixture):
data = [[4, 8], [4, 8]]
input = tp.Tensor(data, dtype=tp.int8)
scale = tp.ones((2,))
dequantized = tp.dequantize(input, scale, tp.float32, dim=0)
dequantized = compile_fixture(tp.dequantize, input, scale, tp.float32, dim=0)
with raises(
tp.TripyException,
match="Scale must be a constant tensor in dequantize op",
Expand Down
Loading