diff --git a/models/experimental/functional_yolov4/reference/downsample1.py b/models/experimental/functional_yolov4/reference/downsample1.py new file mode 100644 index 00000000000..2addb26824d --- /dev/null +++ b/models/experimental/functional_yolov4/reference/downsample1.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + + +class DownSample1(nn.Module): + def __init__(self): + super().__init__() + self.c1 = nn.Conv2d(3, 32, 3, 1, 1, bias=False) + self.b1 = nn.BatchNorm2d(32) + self.relu = nn.ReLU(inplace=True) + + self.c2 = nn.Conv2d(32, 64, 3, 2, 1, bias=False) + self.b2 = nn.BatchNorm2d(64) + + self.c3 = nn.Conv2d(64, 64, 1, 1, 0, bias=False) + self.b3 = nn.BatchNorm2d(64) + + self.c4 = nn.Conv2d(64, 64, 1, 1, 0, bias=False) + self.b4 = nn.BatchNorm2d(64) + + self.c5 = nn.Conv2d(64, 32, 1, 1, 0, bias=False) + self.b5 = nn.BatchNorm2d(32) + + self.c6 = nn.Conv2d(32, 64, 3, 1, 1, bias=False) + self.b6 = nn.BatchNorm2d(64) + + self.c7 = nn.Conv2d(64, 64, 1, 1, 0, bias=False) + self.b7 = nn.BatchNorm2d(64) + + self.c8 = nn.Conv2d(128, 64, 1, 1, 0, bias=False) + self.b8 = nn.BatchNorm2d(64) + + def forward(self, input: torch.Tensor): + x1 = self.c1(input) + x1_b = self.b1(x1) + x1_m = self.relu(x1_b) + + x2 = self.c2(x1_m) + x2_b = self.b2(x2) + x2_m = self.relu(x2_b) + + x3 = self.c3(x2_m) + x3_b = self.b3(x3) + x3_m = self.relu(x3_b) + + x4 = self.c4(x2_m) + x4_b = self.b4(x4) + x4_m = self.relu(x4_b) + + x5 = self.c5(x4_m) + x5_b = self.b5(x5) + x5_m = self.relu(x5_b) + + x6 = self.c6(x5_m) + x6_b = self.b6(x6) + x6_m = self.relu(x6_b) + x6_m = x6_m + x4_m + + x7 = self.c7(x6_m) + x7_b = self.b7(x7) + x7_m = self.relu(x7_b) + x7_m = torch.cat([x7_m, x3_m], dim=1) + + x8 = self.c8(x7_m) + x8_b = self.b8(x8) + x8_m = self.relu(x8_b) + + return x8_m diff --git a/models/experimental/functional_yolov4/reference/downsample2.py b/models/experimental/functional_yolov4/reference/downsample2.py new file mode 100644 index 00000000000..39f9fd4b52d --- /dev/null +++ b/models/experimental/functional_yolov4/reference/downsample2.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.reference.resblock import ResBlock + + +class DownSample2(nn.Module): + def __init__(self): + super().__init__() + self.c1 = nn.Conv2d(64, 128, 3, 2, 1, bias=False) + self.b1 = nn.BatchNorm2d(128) + self.relu = nn.ReLU(inplace=True) + + self.c2 = nn.Conv2d(128, 64, 1, 1, bias=False) + self.b2 = nn.BatchNorm2d(64) + + self.c3 = nn.Conv2d(128, 64, 1, 1, bias=False) + self.b3 = nn.BatchNorm2d(64) + + self.res = ResBlock(64, 2) + + self.c4 = nn.Conv2d(64, 64, 1, 1, bias=False) + self.b4 = nn.BatchNorm2d(64) + + self.c5 = nn.Conv2d(128, 128, 1, 1, bias=False) + self.b5 = nn.BatchNorm2d(128) + + def forward(self, input: torch.Tensor): + x1 = self.c1(input) + x1_b = self.b1(x1) + x1_m = self.relu(x1_b) + + x2 = self.c2(x1_m) + x2_b = self.b2(x2) + x2_m = self.relu(x2_b) + + x3 = self.c3(x1_m) + x3_b = self.b3(x3) + x3_m = self.relu(x3_b) + + r1 = self.res(x3_m) + + x4 = self.c4(r1) + x4_b = self.b4(x4) + x4_m = self.relu(x4_b) + + x4_m = torch.cat([x4_m, x2_m], dim=1) + + x5 = self.c5(x4_m) + x5_b = self.b5(x5) + x5_m = self.relu(x5_b) + + return x5_m diff --git a/models/experimental/functional_yolov4/reference/downsample3.py b/models/experimental/functional_yolov4/reference/downsample3.py new file mode 100644 index 00000000000..8a8a15ea162 --- /dev/null +++ b/models/experimental/functional_yolov4/reference/downsample3.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.reference.resblock import ResBlock + + +class DownSample3(nn.Module): + def __init__(self): + super().__init__() + self.c1 = nn.Conv2d(128, 256, 3, 2, 1, bias=False) + self.b1 = nn.BatchNorm2d(256) + self.relu = nn.ReLU(inplace=True) + + self.c2 = nn.Conv2d(256, 128, 1, 1, bias=False) + self.b2 = nn.BatchNorm2d(128) + + self.c3 = nn.Conv2d(256, 128, 1, 1, bias=False) + self.b3 = nn.BatchNorm2d(128) + + self.res = ResBlock(128, 8) + + self.c4 = nn.Conv2d(128, 128, 1, 1, bias=False) + self.b4 = nn.BatchNorm2d(128) + + self.c5 = nn.Conv2d(256, 256, 1, 1, bias=False) + self.b5 = nn.BatchNorm2d(256) + + def forward(self, input: torch.Tensor): + x1 = self.c1(input) + x1_b = self.b1(x1) + x1_m = self.relu(x1_b) + + x2 = self.c2(x1_m) + x2_b = self.b2(x2) + x2_m = self.relu(x2_b) + + x3 = self.c3(x1_m) + x3_b = self.b3(x3) + x3_m = self.relu(x3_b) + + r1 = self.res(x3_m) + + x4 = self.c4(r1) + x4_b = self.b4(x4) + x4_m = self.relu(x4_b) + + x4_m = torch.cat([x4_m, x2_m], dim=1) + + x5 = self.c5(x4_m) + x5_b = self.b5(x5) + x5_m = self.relu(x5_b) + return x5_m diff --git a/models/experimental/functional_yolov4/reference/downsample4.py b/models/experimental/functional_yolov4/reference/downsample4.py new file mode 100644 index 00000000000..4538c9293c5 --- /dev/null +++ b/models/experimental/functional_yolov4/reference/downsample4.py @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.reference.resblock import ResBlock + + +class DownSample4(nn.Module): + def __init__(self): + super().__init__() + self.c1 = nn.Conv2d(256, 512, 3, 2, 1, bias=False) + self.b1 = nn.BatchNorm2d(512) + self.relu = nn.ReLU(inplace=True) + + self.c2 = nn.Conv2d(512, 256, 1, 1, 0, bias=False) + self.b2 = nn.BatchNorm2d(256) + + self.c3 = nn.Conv2d(512, 256, 1, 1, 0, bias=False) + self.b3 = nn.BatchNorm2d(256) + + self.res = ResBlock(256, 8) + + self.c4 = nn.Conv2d(256, 256, 1, 1, 0, bias=False) + self.b4 = nn.BatchNorm2d(256) + + self.c5 = nn.Conv2d(512, 512, 1, 1, 0, bias=False) + self.b5 = nn.BatchNorm2d(512) + + def forward(self, input: torch.Tensor): + x1 = self.c1(input) + x1_b = self.b1(x1) + x1_m = self.relu(x1_b) + + x2 = self.c2(x1_m) + x2_b = self.b2(x2) + x2_m = self.relu(x2_b) + + x3 = self.c3(x1_m) + x3_b = self.b3(x3) + x3_m = self.relu(x3_b) + + # resblock + r = self.res(x3_m) + + x4 = self.c4(r) + x4_b = self.b4(x4) + x4_m = self.relu(x4_b) + + x4_m = torch.cat([x4_m, x2_m], dim=1) + + x5 = self.c5(x4_m) + x5_b = self.b5(x5) + x5_m = self.relu(x5_b) + + return x5_m diff --git a/models/experimental/functional_yolov4/reference/downsample5.py b/models/experimental/functional_yolov4/reference/downsample5.py new file mode 100644 index 00000000000..5e2200346ac --- /dev/null +++ b/models/experimental/functional_yolov4/reference/downsample5.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.reference.resblock import ResBlock + + +class DownSample5(nn.Module): + def __init__(self): + super().__init__() + self.c1 = nn.Conv2d(512, 1024, 3, 2, 1, bias=False) + self.b1 = nn.BatchNorm2d(1024) + self.relu = nn.ReLU(inplace=True) + + self.c2 = nn.Conv2d(1024, 512, 1, 1, bias=False) + self.b2 = nn.BatchNorm2d(512) + + self.c3 = nn.Conv2d(1024, 512, 1, 1, bias=False) + self.b3 = nn.BatchNorm2d(512) + + self.res = ResBlock(512, 4) + + self.c4 = nn.Conv2d(512, 512, 1, 1, bias=False) + self.b4 = nn.BatchNorm2d(512) + self.relu = nn.ReLU(inplace=True) + + self.c5 = nn.Conv2d(1024, 1024, 1, 1, bias=False) + self.b5 = nn.BatchNorm2d(1024) + self.relu = nn.ReLU(inplace=True) + + def forward(self, input: torch.Tensor): + x1 = self.c1(input) + x1_b = self.b1(x1) + x1_m = self.relu(x1_b) + + x2 = self.c2(x1_m) + x2_b = self.b2(x2) + x2_m = self.relu(x2_b) + + x3 = self.c3(x1_m) + x3_b = self.b3(x3) + x3_m = self.relu(x3_b) + + # resblock + r = self.res(x3_m) + + x4 = self.c4(r) + x4_b = self.b4(x4) + x4_m = self.relu(x4_b) + + x4_m = torch.cat([x4_m, x2_m], dim=1) + + x5 = self.c5(x4_m) + x5_b = self.b5(x5) + x5_m = self.relu(x5_b) + + return x5_m diff --git a/models/experimental/functional_yolov4/reference/downsamples.py b/models/experimental/functional_yolov4/reference/downsamples.py new file mode 100644 index 00000000000..76a9ddb9c49 --- /dev/null +++ b/models/experimental/functional_yolov4/reference/downsamples.py @@ -0,0 +1,27 @@ +from models.experimental.functional_yolov4.reference.downsample1 import DownSample1 +from models.experimental.functional_yolov4.reference.downsample2 import DownSample2 +from models.experimental.functional_yolov4.reference.downsample3 import DownSample3 +from models.experimental.functional_yolov4.reference.downsample4 import DownSample4 +from models.experimental.functional_yolov4.reference.downsample5 import DownSample5 + +import torch +import torch.nn as nn + + +class DownSamples(nn.Module): + def __init__(self): + super(DownSamples, self).__init__() + self.downsample1 = DownSample1() + self.downsample2 = DownSample2() + self.downsample3 = DownSample3() + self.downsample4 = DownSample4() + self.downsample5 = DownSample5() + + def forward(self, input: torch.Tensor): + output = self.downsample1(input) + output = self.downsample2(output) + output = self.downsample3(output) + output = self.downsample4(output) + output = self.downsample5(output) + + return output diff --git a/models/experimental/functional_yolov4/reference/resblock.py b/models/experimental/functional_yolov4/reference/resblock.py new file mode 100644 index 00000000000..dbd65796983 --- /dev/null +++ b/models/experimental/functional_yolov4/reference/resblock.py @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch.nn as nn + + +class ResBlock(nn.Module): + def __init__(self, ch, nblocks=1, shortcut=True): + super().__init__() + self.shortcut = shortcut + self.module_list = nn.ModuleList() + for i in range(nblocks): + conv1 = nn.Conv2d(ch, ch, 1, 1, 0, bias=False) + bn1 = nn.BatchNorm2d(ch) + relu1 = nn.ReLU(inplace=True) + conv2 = nn.Conv2d(ch, ch, 3, 1, 1, bias=False) + bn2 = nn.BatchNorm2d(ch) + relu2 = nn.ReLU(inplace=True) + resblock_one = nn.ModuleList([conv1, bn1, relu1, conv2, bn2, relu2]) + self.module_list.append(resblock_one) + + def forward(self, x): + for module in self.module_list: + h = x + for res in module: + h = res(h) + x = x + h if self.shortcut else h + return x diff --git a/models/experimental/functional_yolov4/tt/ttnn_downsample1.py b/models/experimental/functional_yolov4/tt/ttnn_downsample1.py new file mode 100644 index 00000000000..6939b35aa7f --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_downsample1.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model + +import ttnn +import tt_lib + + +class TtDownSample1: + def __init__( + self, + parameters, + ) -> None: + self.c1 = parameters.c1 + self.c2 = parameters.c2 + self.c3 = parameters.c3 + self.c4 = parameters.c4 + self.c5 = parameters.c5 + self.c6 = parameters.c6 + self.c7 = parameters.c7 + self.c8 = parameters.c8 + + def __call__(self, device, input_tensor): + input_tensor = input_tensor.to(device, self.c1.conv.input_sharded_memory_config) + + output_tensor = self.c1(input_tensor) + output_tensor = self.c2(output_tensor) + output_tensor_c2 = output_tensor + output_tensor = self.c3(output_tensor) + + output_tensor_c3 = output_tensor + output_tensor = self.c4(output_tensor_c2) + + output_tensor_c4 = output_tensor + output_tensor = self.c5(output_tensor) + output_tensor = self.c6(output_tensor) + + output_tensor = output_tensor + output_tensor_c4 + output_tensor = self.c7(output_tensor) + + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + output_tensor = ttnn.concat([output_tensor, output_tensor_c3], dim=3) + + output_tensor = tt_lib.tensor.interleaved_to_sharded(output_tensor, self.c8.conv.input_sharded_memory_config) + output_tensor = self.c8(output_tensor) + + return ttnn.from_device(output_tensor) diff --git a/models/experimental/functional_yolov4/tt/ttnn_downsample2.py b/models/experimental/functional_yolov4/tt/ttnn_downsample2.py new file mode 100644 index 00000000000..ab7e98aaffa --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_downsample2.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model + +import ttnn +import tt_lib +from models.experimental.functional_yolov4.tt.ttnn_resblock import TtResBlock + + +class TtDownSample2: + def __init__( + self, + parameters, + ) -> None: + self.c1 = parameters.c1 + self.c2 = parameters.c2 + self.c3 = parameters.c3 + self.res = TtResBlock(parameters.res, 2, True) + self.c4 = parameters.c4 + self.c5 = parameters.c5 + + def __call__(self, device, input_tensor): + input_tensor = input_tensor.to(device, self.c1.conv.input_sharded_memory_config) + + output_tensor = self.c1(input_tensor) + output_tensor_c1 = output_tensor + output_tensor = self.c2(output_tensor) + output_tensor_c2 = output_tensor + output_tensor = self.c3(output_tensor_c1) + output_tensor = self.res(device, output_tensor) + + output_tensor = output_tensor.to(device, self.c4.conv.input_sharded_memory_config) + output_tensor = self.c4(output_tensor) + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + output_tensor = ttnn.concat([output_tensor, output_tensor_c2], dim=3) + + output_tensor = tt_lib.tensor.interleaved_to_sharded(output_tensor, self.c5.conv.input_sharded_memory_config) + output_tensor = self.c5(output_tensor) + return ttnn.from_device(output_tensor) diff --git a/models/experimental/functional_yolov4/tt/ttnn_downsample3.py b/models/experimental/functional_yolov4/tt/ttnn_downsample3.py new file mode 100644 index 00000000000..44c1eb1bd57 --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_downsample3.py @@ -0,0 +1,45 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.tt.ttnn_resblock import TtResBlock + +from ttnn.model_preprocessing import preprocess_model + +import ttnn +import tt_lib + + +class TtDownSample3: + def __init__( + self, + parameters, + ) -> None: + self.c1 = parameters.c1 + self.c2 = parameters.c2 + self.c3 = parameters.c3 + self.res = TtResBlock(parameters.res, 8, True) + self.c4 = parameters.c4 + self.c5 = parameters.c5 + + def __call__(self, device, input_tensor): + input_tensor = input_tensor.to(device, self.c1.conv.input_sharded_memory_config) + + output_tensor_c1 = self.c1(input_tensor) + output_tensor_c2 = self.c2(output_tensor_c1) + output_tensor = self.c3(output_tensor_c1) + + output_tensor = self.res(device, output_tensor) + output_tensor = output_tensor.to(device, self.c4.conv.input_sharded_memory_config) + + output_tensor = self.c4(output_tensor) + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + output_tensor = ttnn.concat([output_tensor, output_tensor_c2], dim=3) + output_tensor = tt_lib.tensor.interleaved_to_sharded(output_tensor, self.c5.conv.input_sharded_memory_config) + output_tensor = self.c5(output_tensor) + + return ttnn.from_device(output_tensor) diff --git a/models/experimental/functional_yolov4/tt/ttnn_downsample4.py b/models/experimental/functional_yolov4/tt/ttnn_downsample4.py new file mode 100644 index 00000000000..cad0b16678b --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_downsample4.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.tt.ttnn_resblock import TtResBlock +from ttnn.model_preprocessing import preprocess_model + +import ttnn +import tt_lib + + +class TtDownSample4: + def __init__( + self, + parameters, + ) -> None: + self.c1 = parameters.c1 + self.c2 = parameters.c2 + self.c3 = parameters.c3 + self.res = TtResBlock(parameters.res, 8, True) + self.c4 = parameters.c4 + self.c5 = parameters.c5 + + def __call__(self, device, input_tensor): + input_tensor = input_tensor.to(device, self.c1.conv.input_sharded_memory_config) + output_tensor = self.c1(input_tensor) + output_tensor_c1 = output_tensor + output_tensor = self.c2(output_tensor) + output_tensor_c2 = output_tensor + output_tensor = self.c3(output_tensor_c1) + output_tensor = self.res(device, output_tensor) + output_tensor = output_tensor.to(device, self.c4.conv.input_sharded_memory_config) + output_tensor = self.c4(output_tensor) + + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + output_tensor = ttnn.concat([output_tensor, output_tensor_c2], dim=3) + output_tensor = output_tensor.to(device, self.c5.conv.input_sharded_memory_config) + + output_tensor = self.c5(output_tensor) + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + return ttnn.from_device(output_tensor) diff --git a/models/experimental/functional_yolov4/tt/ttnn_downsample5.py b/models/experimental/functional_yolov4/tt/ttnn_downsample5.py new file mode 100644 index 00000000000..8001c583676 --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_downsample5.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn +from models.experimental.functional_yolov4.tt.ttnn_resblock import TtResBlock +from ttnn.model_preprocessing import preprocess_model + +import ttnn +import tt_lib + + +class TtDownSample5: + def __init__( + self, + parameters, + ) -> None: + self.c1 = parameters.c1 + self.c2 = parameters.c2 + self.c3 = parameters.c3 + self.res = TtResBlock(parameters.res, 4, True) + self.c4 = parameters.c4 + self.c5 = parameters.c5 + + def __call__(self, device, input_tensor): + input_tensor = input_tensor.to(device, self.c1.conv.input_sharded_memory_config) + output_tensor = self.c1(input_tensor) + output_tensor_c1 = output_tensor + output_tensor = self.c2(output_tensor) + output_tensor_c2 = output_tensor + output_tensor = self.c3(output_tensor_c1) + output_tensor = self.res(device, output_tensor) + output_tensor = output_tensor.to(device, self.c4.conv.input_sharded_memory_config) + output_tensor = self.c4(output_tensor) + + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + output_tensor = ttnn.concat([output_tensor, output_tensor_c2], dim=3) + output_tensor = output_tensor.to(device, self.c5.conv.input_sharded_memory_config) + + output_tensor = self.c5(output_tensor) + output_tensor = tt_lib.tensor.sharded_to_interleaved(output_tensor, ttnn.L1_MEMORY_CONFIG) + output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.TILE_LAYOUT) + return ttnn.from_device(output_tensor) diff --git a/models/experimental/functional_yolov4/tt/ttnn_downsamples.py b/models/experimental/functional_yolov4/tt/ttnn_downsamples.py new file mode 100644 index 00000000000..949219e80d1 --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_downsamples.py @@ -0,0 +1,25 @@ +from models.experimental.functional_yolov4.tt.ttnn_downsample1 import TtDownSample1 +from models.experimental.functional_yolov4.tt.ttnn_downsample2 import TtDownSample2 +from models.experimental.functional_yolov4.tt.ttnn_downsample3 import TtDownSample3 +from models.experimental.functional_yolov4.tt.ttnn_downsample4 import TtDownSample4 +from models.experimental.functional_yolov4.tt.ttnn_downsample5 import TtDownSample5 + +import ttnn + + +class TtDownSamples: + def __init__(self, parameters) -> None: + self.downsample1 = TtDownSample1(parameters["downsample1"]) + self.downsample2 = TtDownSample2(parameters["downsample2"]) + self.downsample3 = TtDownSample3(parameters["downsample3"]) + self.downsample4 = TtDownSample4(parameters["downsample4"]) + self.downsample5 = TtDownSample5(parameters["downsample5"]) + + def __call__(self, device, input_tensor): + output_tensor = self.downsample1(device, input_tensor) + output_tensor = self.downsample2(device, output_tensor) + output_tensor = self.downsample3(device, output_tensor) + output_tensor = self.downsample4(device, output_tensor) + output_tensor = self.downsample5(device, output_tensor) + + return ttnn.from_device(output_tensor) diff --git a/models/experimental/functional_yolov4/tt/ttnn_resblock.py b/models/experimental/functional_yolov4/tt/ttnn_resblock.py new file mode 100644 index 00000000000..6ad0d955e89 --- /dev/null +++ b/models/experimental/functional_yolov4/tt/ttnn_resblock.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import ttnn +import tt_lib + + +class TtResBlock: + def __init__(self, parameters, nblocks, shortcut) -> None: + self.shortcut = shortcut + self.nblocks = nblocks + self.module_list = [] + for i in range(nblocks): + conv1 = parameters[f"resblock_{i}_conv1"] + conv2 = parameters[f"resblock_{i}_conv2"] + resblock_one = [conv1, conv2] + self.module_list.append(resblock_one) + + def __call__(self, device, input_tensor): + input_tensor = tt_lib.tensor.sharded_to_interleaved(input_tensor, ttnn.L1_MEMORY_CONFIG) + input_tensor = ttnn.to_layout(input_tensor, layout=ttnn.TILE_LAYOUT) + for i in range(self.nblocks): + output_tensor_h = input_tensor + output_tensor_h = output_tensor_h.to(device, self.module_list[i][0].conv.input_sharded_memory_config) + output_tensor_1 = self.module_list[i][0](output_tensor_h) + output_tensor_h = self.module_list[i][1](output_tensor_1) + output_tensor_h = tt_lib.tensor.sharded_to_interleaved(output_tensor_h, ttnn.L1_MEMORY_CONFIG) + output_tensor_h = ttnn.to_layout(output_tensor_h, layout=ttnn.TILE_LAYOUT) + + input_tensor = (input_tensor + output_tensor_h) if self.shortcut else output_tensor_h + return ttnn.from_device(input_tensor) diff --git a/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d1.py b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d1.py new file mode 100644 index 00000000000..4414615940f --- /dev/null +++ b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d1.py @@ -0,0 +1,158 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model, preprocess_conv2d, fold_batch_norm2d_into_conv2d + +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.utility_functions import skip_for_wormhole_b0 +from models.experimental.functional_yolov4.reference.downsample1 import DownSample1 +from models.experimental.functional_yolov4.tt.ttnn_downsample1 import TtDownSample1 + +import time +import tt_lib as ttl +import tt_lib.profiler as profiler + +import ttnn +import tt_lib +from ttnn.model_preprocessing import preprocess_conv2d, fold_batch_norm2d_into_conv2d +import ttnn + + +def update_ttnn_module_args(ttnn_module_args): + ttnn_module_args["use_1d_systolic_array"] = ttnn_module_args.in_channels < 256 + + +def custom_preprocessor(device, model, name, ttnn_module_args): + parameters = {} + if isinstance(model, DownSample1): + ttnn_module_args.c1["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c1["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c1["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c1["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c1["deallocate_activation"] = True + ttnn_module_args.c1["conv_blocking_and_parallelization_config_override"] = None + + conv1_weight, conv1_bias = fold_batch_norm2d_into_conv2d(model.c1, model.b1) + update_ttnn_module_args(ttnn_module_args.c1) + parameters["c1"], c1_parallel_config = preprocess_conv2d( + conv1_weight, conv1_bias, ttnn_module_args.c1, return_parallel_config=True + ) + + ttnn_module_args.c2["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c2["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c2["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c2["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c2["activation"] = "relu" # Fuse relu with conv2 + ttnn_module_args.c2["deallocate_activation"] = True + ttnn_module_args.c2["conv_blocking_and_parallelization_config_override"] = None + + conv2_weight, conv2_bias = fold_batch_norm2d_into_conv2d(model.c2, model.b2) + update_ttnn_module_args(ttnn_module_args.c2) + parameters["c2"], c2_parallel_config = preprocess_conv2d( + conv2_weight, conv2_bias, ttnn_module_args.c2, return_parallel_config=True + ) + + ttnn_module_args.c3["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c3["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c3["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c3["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c3["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c3["deallocate_activation"] = True + ttnn_module_args.c3["conv_blocking_and_parallelization_config_override"] = None + + conv3_weight, conv3_bias = fold_batch_norm2d_into_conv2d(model.c3, model.b3) + update_ttnn_module_args(ttnn_module_args.c3) + parameters["c3"], c3_parallel_config = preprocess_conv2d( + conv3_weight, conv3_bias, ttnn_module_args.c3, return_parallel_config=True + ) + + ttnn_module_args.c4["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c4["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c4["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c4["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c4["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c4["deallocate_activation"] = True + ttnn_module_args.c4["conv_blocking_and_parallelization_config_override"] = None + + conv4_weight, conv4_bias = fold_batch_norm2d_into_conv2d(model.c4, model.b4) + update_ttnn_module_args(ttnn_module_args.c4) + parameters["c4"], c4_parallel_config = preprocess_conv2d( + conv4_weight, conv4_bias, ttnn_module_args.c4, return_parallel_config=True + ) + + ttnn_module_args.c5["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c5["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c5["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c5["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c5["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c5["deallocate_activation"] = True + ttnn_module_args.c5["conv_blocking_and_parallelization_config_override"] = None + + conv5_weight, conv5_bias = fold_batch_norm2d_into_conv2d(model.c5, model.b5) + update_ttnn_module_args(ttnn_module_args.c5) + parameters["c5"], c5_parallel_config = preprocess_conv2d( + conv5_weight, conv5_bias, ttnn_module_args.c5, return_parallel_config=True + ) + + ttnn_module_args.c6["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c6["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c6["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c6["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c6["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c6["deallocate_activation"] = True + ttnn_module_args.c6["conv_blocking_and_parallelization_config_override"] = None + + conv6_weight, conv6_bias = fold_batch_norm2d_into_conv2d(model.c6, model.b6) + update_ttnn_module_args(ttnn_module_args.c6) + parameters["c6"], c6_parallel_config = preprocess_conv2d( + conv6_weight, conv6_bias, ttnn_module_args.c6, return_parallel_config=True + ) + + ttnn_module_args.c7["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c7["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c7["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c7["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c7["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c7["deallocate_activation"] = True + ttnn_module_args.c7["conv_blocking_and_parallelization_config_override"] = None + + conv7_weight, conv7_bias = fold_batch_norm2d_into_conv2d(model.c7, model.b7) + update_ttnn_module_args(ttnn_module_args.c7) + parameters["c7"], c7_parallel_config = preprocess_conv2d( + conv7_weight, conv7_bias, ttnn_module_args.c7, return_parallel_config=True + ) + + ttnn_module_args.c8["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args.c8["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c8["dtype"] = ttnn.bfloat8_b + ttnn_module_args.c8["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c8["activation"] = "relu" # Fuse relu with conv1 + ttnn_module_args.c8["deallocate_activation"] = True + ttnn_module_args.c8["conv_blocking_and_parallelization_config_override"] = None + + conv8_weight, conv8_bias = fold_batch_norm2d_into_conv2d(model.c8, model.b8) + update_ttnn_module_args(ttnn_module_args.c8) + parameters["c8"], c8_parallel_config = preprocess_conv2d( + conv8_weight, conv8_bias, ttnn_module_args.c8, return_parallel_config=True + ) + + return parameters diff --git a/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d2.py b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d2.py new file mode 100644 index 00000000000..9a4f8dee527 --- /dev/null +++ b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d2.py @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model, preprocess_conv2d, fold_batch_norm2d_into_conv2d + +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.utility_functions import skip_for_wormhole_b0 +from models.experimental.functional_yolov4.reference.downsample2 import DownSample2 +from models.experimental.functional_yolov4.tt.ttnn_downsample2 import TtDownSample2 + +import time +import tt_lib as ttl +import tt_lib.profiler as profiler + +import ttnn +import tt_lib +from ttnn.model_preprocessing import preprocess_conv2d, fold_batch_norm2d_into_conv2d +import ttnn + + +def update_ttnn_module_args(ttnn_module_args): + ttnn_module_args["use_1d_systolic_array"] = ttnn_module_args.in_channels <= 256 + ttnn_module_args["dtype"] = ttnn.bfloat8_b + ttnn_module_args["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args["deallocate_activation"] = True + ttnn_module_args["conv_blocking_and_parallelization_config_override"] = None + ttnn_module_args["activation"] = "relu" + + +def custom_preprocessor(device, model, name, ttnn_module_args): + parameters = {} + if isinstance(model, DownSample2): + ttnn_module_args.c1["weights_dtype"] = ttnn.bfloat8_b + conv1_weight, conv1_bias = fold_batch_norm2d_into_conv2d(model.c1, model.b1) + update_ttnn_module_args(ttnn_module_args.c1) + parameters["c1"], c1_parallel_config = preprocess_conv2d( + conv1_weight, conv1_bias, ttnn_module_args.c1, return_parallel_config=True + ) + + ttnn_module_args.c2["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c2["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + conv2_weight, conv2_bias = fold_batch_norm2d_into_conv2d(model.c2, model.b2) + update_ttnn_module_args(ttnn_module_args.c2) + parameters["c2"], c2_parallel_config = preprocess_conv2d( + conv2_weight, conv2_bias, ttnn_module_args.c2, return_parallel_config=True + ) + + ttnn_module_args.c3["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args.c3["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + conv3_weight, conv3_bias = fold_batch_norm2d_into_conv2d(model.c3, model.b3) + update_ttnn_module_args(ttnn_module_args.c3) + parameters["c3"], c3_parallel_config = preprocess_conv2d( + conv3_weight, conv3_bias, ttnn_module_args.c3, return_parallel_config=True + ) + + parameters["res"] = {} + for i, block in enumerate(model.res.module_list): + conv1 = block[0] + bn1 = block[1] + conv2 = block[3] + bn2 = block[4] + + ttnn_module_args["res"][f"resblock_{i}_conv1"] = ttnn_module_args["res"]["0"] + ttnn_module_args["res"][f"resblock_{i}_conv1"]["weights_dtype"] = ttnn.bfloat8_b + weight1, bias1 = fold_batch_norm2d_into_conv2d(conv1, bn1) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv1"]) + parameters["res"][f"resblock_{i}_conv1"], _ = preprocess_conv2d( + weight1, bias1, ttnn_module_args["res"][f"resblock_{i}_conv1"], return_parallel_config=True + ) + + ttnn_module_args["res"][f"resblock_{i}_conv2"] = ttnn_module_args["res"]["3"] + ttnn_module_args["res"][f"resblock_{i}_conv2"]["weights_dtype"] = ttnn.bfloat8_b + weight2, bias2 = fold_batch_norm2d_into_conv2d(conv2, bn2) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv2"]) + parameters["res"][f"resblock_{i}_conv2"], _ = preprocess_conv2d( + weight2, bias2, ttnn_module_args["res"][f"resblock_{i}_conv2"], return_parallel_config=True + ) + + ttnn_module_args.c4["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c4["weights_dtype"] = ttnn.bfloat8_b + conv4_weight, conv4_bias = fold_batch_norm2d_into_conv2d(model.c4, model.b4) + update_ttnn_module_args(ttnn_module_args.c4) + parameters["c4"], c4_parallel_config = preprocess_conv2d( + conv4_weight, conv4_bias, ttnn_module_args.c4, return_parallel_config=True + ) + + ttnn_module_args.c5["use_shallow_conv_variant"] = ( + False if device.arch() == tt_lib.device.Arch.WORMHOLE_B0 else True + ) + ttnn_module_args.c5["weights_dtype"] = ttnn.bfloat8_b + conv5_weight, conv5_bias = fold_batch_norm2d_into_conv2d(model.c5, model.b5) + update_ttnn_module_args(ttnn_module_args.c5) + parameters["c5"], c5_parallel_config = preprocess_conv2d( + conv5_weight, conv5_bias, ttnn_module_args.c5, return_parallel_config=True + ) + return parameters diff --git a/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d3.py b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d3.py new file mode 100644 index 00000000000..c1d7fe4098c --- /dev/null +++ b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d3.py @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model, preprocess_conv2d, fold_batch_norm2d_into_conv2d + +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.utility_functions import skip_for_wormhole_b0 +from models.experimental.functional_yolov4.reference.downsample3 import DownSample3 +from models.experimental.functional_yolov4.tt.ttnn_downsample3 import TtDownSample3 + +import time +import tt_lib as ttl +import tt_lib.profiler as profiler + +import ttnn +import tt_lib +from ttnn.model_preprocessing import preprocess_conv2d, fold_batch_norm2d_into_conv2d +import ttnn + + +def update_ttnn_module_args(ttnn_module_args): + ttnn_module_args["use_1d_systolic_array"] = ttnn_module_args.in_channels <= 256 + ttnn_module_args["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args["dtype"] = ttnn.bfloat8_b + ttnn_module_args["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args["deallocate_activation"] = True + ttnn_module_args["conv_blocking_and_parallelization_config_override"] = None + ttnn_module_args["activation"] = "relu" + + +def custom_preprocessor(device, model, name, ttnn_module_args): + parameters = {} + if isinstance(model, DownSample3): + conv1_weight, conv1_bias = fold_batch_norm2d_into_conv2d(model.c1, model.b1) + update_ttnn_module_args(ttnn_module_args.c1) + parameters["c1"], c1_parallel_config = preprocess_conv2d( + conv1_weight, conv1_bias, ttnn_module_args.c1, return_parallel_config=True + ) + + ttnn_module_args.c2["use_shallow_conv_variant"] = False + ttnn_module_args.c2["weights_dtype"] = ttnn.bfloat8_b + conv2_weight, conv2_bias = fold_batch_norm2d_into_conv2d(model.c2, model.b2) + update_ttnn_module_args(ttnn_module_args.c2) + parameters["c2"], c2_parallel_config = preprocess_conv2d( + conv2_weight, conv2_bias, ttnn_module_args.c2, return_parallel_config=True + ) + + ttnn_module_args.c3["use_shallow_conv_variant"] = False + ttnn_module_args.c3["weights_dtype"] = ttnn.bfloat8_b + conv3_weight, conv3_bias = fold_batch_norm2d_into_conv2d(model.c3, model.b3) + update_ttnn_module_args(ttnn_module_args.c3) + parameters["c3"], c3_parallel_config = preprocess_conv2d( + conv3_weight, conv3_bias, ttnn_module_args.c3, return_parallel_config=True + ) + + parameters["res"] = {} + for i, block in enumerate(model.res.module_list): + conv1 = block[0] + bn1 = block[1] + conv2 = block[3] + bn2 = block[4] + + ttnn_module_args["res"][f"resblock_{i}_conv1"] = ttnn_module_args["res"]["0"] + ttnn_module_args["res"][f"resblock_{i}_conv1"]["weights_dtype"] = ttnn.bfloat8_b + weight1, bias1 = fold_batch_norm2d_into_conv2d(conv1, bn1) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv1"]) + parameters["res"][f"resblock_{i}_conv1"], _ = preprocess_conv2d( + weight1, bias1, ttnn_module_args["res"][f"resblock_{i}_conv1"], return_parallel_config=True + ) + + ttnn_module_args["res"][f"resblock_{i}_conv2"] = ttnn_module_args["res"]["3"] + ttnn_module_args["res"][f"resblock_{i}_conv2"]["weights_dtype"] = ttnn.bfloat8_b + weight2, bias2 = fold_batch_norm2d_into_conv2d(conv2, bn2) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv2"]) + parameters["res"][f"resblock_{i}_conv2"], _ = preprocess_conv2d( + weight2, bias2, ttnn_module_args["res"][f"resblock_{i}_conv2"], return_parallel_config=True + ) + + ttnn_module_args.c4["use_shallow_conv_variant"] = False + ttnn_module_args.c4["weights_dtype"] = ttnn.bfloat8_b + conv20_weight, conv20_bias = fold_batch_norm2d_into_conv2d(model.c4, model.b4) + update_ttnn_module_args(ttnn_module_args.c4) + parameters["c4"], c4_parallel_config = preprocess_conv2d( + conv20_weight, conv20_bias, ttnn_module_args.c4, return_parallel_config=True + ) + + ttnn_module_args.c5["use_shallow_conv_variant"] = False + ttnn_module_args.c5["weights_dtype"] = ttnn.bfloat8_b + conv5_weight, conv5_bias = fold_batch_norm2d_into_conv2d(model.c5, model.b5) + update_ttnn_module_args(ttnn_module_args.c5) + parameters["c5"], c5_parallel_config = preprocess_conv2d( + conv5_weight, conv5_bias, ttnn_module_args.c5, return_parallel_config=True + ) + + return parameters diff --git a/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d4.py b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d4.py new file mode 100644 index 00000000000..08c9f7b8dfd --- /dev/null +++ b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d4.py @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model, preprocess_conv2d, fold_batch_norm2d_into_conv2d + +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.utility_functions import skip_for_wormhole_b0 +from models.experimental.functional_yolov4.reference.downsample4 import DownSample4 +from models.experimental.functional_yolov4.tt.ttnn_downsample4 import TtDownSample4 + +import time +import tt_lib as ttl +import tt_lib.profiler as profiler + +import ttnn +import tt_lib +from ttnn.model_preprocessing import preprocess_conv2d, fold_batch_norm2d_into_conv2d + + +def update_ttnn_module_args(ttnn_module_args): + ttnn_module_args["use_1d_systolic_array"] = False # ttnn_module_args.in_channels <= 256 + ttnn_module_args["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args["dtype"] = ttnn.bfloat8_b + ttnn_module_args["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args["deallocate_activation"] = True + ttnn_module_args["conv_blocking_and_parallelization_config_override"] = None + ttnn_module_args["activation"] = "relu" + + +def custom_preprocessor(device, model, name, ttnn_module_args): + parameters = {} + if isinstance(model, DownSample4): + ttnn_module_args.c1["use_shallow_conv_variant"] = False + conv1_weight, conv1_bias = fold_batch_norm2d_into_conv2d(model.c1, model.b1) + update_ttnn_module_args(ttnn_module_args.c1) + parameters["c1"], c1_parallel_config = preprocess_conv2d( + conv1_weight, conv1_bias, ttnn_module_args.c1, return_parallel_config=True + ) + + ttnn_module_args.c2["use_shallow_conv_variant"] = False + ttnn_module_args.c2["weights_dtype"] = ttnn.bfloat8_b + conv2_weight, conv2_bias = fold_batch_norm2d_into_conv2d(model.c2, model.b2) + update_ttnn_module_args(ttnn_module_args.c2) + parameters["c2"], c2_parallel_config = preprocess_conv2d( + conv2_weight, conv2_bias, ttnn_module_args.c2, return_parallel_config=True + ) + + ttnn_module_args.c3["use_shallow_conv_variant"] = False + ttnn_module_args.c3["weights_dtype"] = ttnn.bfloat8_b + conv3_weight, conv3_bias = fold_batch_norm2d_into_conv2d(model.c3, model.b3) + update_ttnn_module_args(ttnn_module_args.c3) + parameters["c3"], c3_parallel_config = preprocess_conv2d( + conv3_weight, conv3_bias, ttnn_module_args.c3, return_parallel_config=True + ) + + parameters["res"] = {} + for i, block in enumerate(model.res.module_list): + conv1 = block[0] + bn1 = block[1] + conv2 = block[3] + bn2 = block[4] + + ttnn_module_args["res"][f"resblock_{i}_conv1"] = ttnn_module_args["res"]["0"] + ttnn_module_args["res"][f"resblock_{i}_conv1"]["weights_dtype"] = ttnn.bfloat8_b + weight1, bias1 = fold_batch_norm2d_into_conv2d(conv1, bn1) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv1"]) + parameters["res"][f"resblock_{i}_conv1"], _ = preprocess_conv2d( + weight1, bias1, ttnn_module_args["res"][f"resblock_{i}_conv1"], return_parallel_config=True + ) + + ttnn_module_args["res"][f"resblock_{i}_conv2"] = ttnn_module_args["res"]["3"] + ttnn_module_args["res"][f"resblock_{i}_conv2"]["weights_dtype"] = ttnn.bfloat8_b + weight2, bias2 = fold_batch_norm2d_into_conv2d(conv2, bn2) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv2"]) + parameters["res"][f"resblock_{i}_conv2"], _ = preprocess_conv2d( + weight2, bias2, ttnn_module_args["res"][f"resblock_{i}_conv2"], return_parallel_config=True + ) + + ttnn_module_args.c4["use_shallow_conv_variant"] = False + ttnn_module_args.c4["weights_dtype"] = ttnn.bfloat8_b + conv4_weight, conv4_bias = fold_batch_norm2d_into_conv2d(model.c4, model.b4) + update_ttnn_module_args(ttnn_module_args.c4) + parameters["c4"], c4_parallel_config = preprocess_conv2d( + conv4_weight, conv4_bias, ttnn_module_args.c4, return_parallel_config=True + ) + + ttnn_module_args.c5["use_shallow_conv_variant"] = False + ttnn_module_args.c5["weights_dtype"] = ttnn.bfloat8_b + conv5_weight, conv5_bias = fold_batch_norm2d_into_conv2d(model.c5, model.b5) + update_ttnn_module_args(ttnn_module_args.c5) + parameters["c5"], c5_parallel_config = preprocess_conv2d( + conv5_weight, conv5_bias, ttnn_module_args.c5, return_parallel_config=True + ) + + return parameters diff --git a/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d5.py b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d5.py new file mode 100644 index 00000000000..68a1c3b908b --- /dev/null +++ b/tests/ttnn/integration_tests/yolov4/custom_preprocessor_d5.py @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model, preprocess_conv2d, fold_batch_norm2d_into_conv2d + +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.utility_functions import skip_for_wormhole_b0 +from models.experimental.functional_yolov4.reference.downsample5 import DownSample5 +from models.experimental.functional_yolov4.tt.ttnn_downsample5 import TtDownSample5 + +import time +import tt_lib as ttl +import tt_lib.profiler as profiler + +import ttnn +import tt_lib +from ttnn.model_preprocessing import preprocess_conv2d, fold_batch_norm2d_into_conv2d + + +def update_ttnn_module_args(ttnn_module_args): + ttnn_module_args["use_1d_systolic_array"] = False # ttnn_module_args.in_channels <= 256 + ttnn_module_args["math_fidelity"] = ttnn.MathFidelity.LoFi + ttnn_module_args["dtype"] = ttnn.bfloat8_b + ttnn_module_args["weights_dtype"] = ttnn.bfloat8_b + ttnn_module_args["deallocate_activation"] = True + ttnn_module_args["conv_blocking_and_parallelization_config_override"] = None + ttnn_module_args["activation"] = "relu" + + +def custom_preprocessor(device, model, name, ttnn_module_args): + parameters = {} + if isinstance(model, DownSample5): + ttnn_module_args.c1["use_shallow_conv_variant"] = False + conv1_weight, conv1_bias = fold_batch_norm2d_into_conv2d(model.c1, model.b1) + update_ttnn_module_args(ttnn_module_args.c1) + parameters["c1"], c1_parallel_config = preprocess_conv2d( + conv1_weight, conv1_bias, ttnn_module_args.c1, return_parallel_config=True + ) + + ttnn_module_args.c2["use_shallow_conv_variant"] = False + ttnn_module_args.c2["weights_dtype"] = ttnn.bfloat8_b + conv2_weight, conv2_bias = fold_batch_norm2d_into_conv2d(model.c2, model.b2) + update_ttnn_module_args(ttnn_module_args.c2) + parameters["c2"], c2_parallel_config = preprocess_conv2d( + conv2_weight, conv2_bias, ttnn_module_args.c2, return_parallel_config=True + ) + + ttnn_module_args.c3["use_shallow_conv_variant"] = False + ttnn_module_args.c3["weights_dtype"] = ttnn.bfloat8_b + conv3_weight, conv3_bias = fold_batch_norm2d_into_conv2d(model.c3, model.b3) + update_ttnn_module_args(ttnn_module_args.c3) + parameters["c3"], c3_parallel_config = preprocess_conv2d( + conv3_weight, conv3_bias, ttnn_module_args.c3, return_parallel_config=True + ) + + parameters["res"] = {} + for i, block in enumerate(model.res.module_list): + conv1 = block[0] + bn1 = block[1] + conv2 = block[3] + bn2 = block[4] + + ttnn_module_args["res"][f"resblock_{i}_conv1"] = ttnn_module_args["res"]["0"] + ttnn_module_args["res"][f"resblock_{i}_conv1"]["weights_dtype"] = ttnn.bfloat8_b + weight1, bias1 = fold_batch_norm2d_into_conv2d(conv1, bn1) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv1"]) + parameters["res"][f"resblock_{i}_conv1"], _ = preprocess_conv2d( + weight1, bias1, ttnn_module_args["res"][f"resblock_{i}_conv1"], return_parallel_config=True + ) + + ttnn_module_args["res"][f"resblock_{i}_conv2"] = ttnn_module_args["res"]["3"] + ttnn_module_args["res"][f"resblock_{i}_conv2"]["weights_dtype"] = ttnn.bfloat8_b + weight2, bias2 = fold_batch_norm2d_into_conv2d(conv2, bn2) + update_ttnn_module_args(ttnn_module_args["res"][f"resblock_{i}_conv2"]) + parameters["res"][f"resblock_{i}_conv2"], _ = preprocess_conv2d( + weight2, bias2, ttnn_module_args["res"][f"resblock_{i}_conv2"], return_parallel_config=True + ) + + ttnn_module_args.c4["use_shallow_conv_variant"] = False + ttnn_module_args.c4["weights_dtype"] = ttnn.bfloat8_b + conv4_weight, conv4_bias = fold_batch_norm2d_into_conv2d(model.c4, model.b4) + update_ttnn_module_args(ttnn_module_args.c4) + parameters["c4"], c4_parallel_config = preprocess_conv2d( + conv4_weight, conv4_bias, ttnn_module_args.c4, return_parallel_config=True + ) + + ttnn_module_args.c5["use_shallow_conv_variant"] = False + ttnn_module_args.c5["weights_dtype"] = ttnn.bfloat8_b + conv5_weight, conv5_bias = fold_batch_norm2d_into_conv2d(model.c5, model.b5) + update_ttnn_module_args(ttnn_module_args.c5) + parameters["c5"], c5_parallel_config = preprocess_conv2d( + conv5_weight, conv5_bias, ttnn_module_args.c5, return_parallel_config=True + ) + + return parameters diff --git a/tests/ttnn/integration_tests/yolov4/test_ttnn_downsamples.py b/tests/ttnn/integration_tests/yolov4/test_ttnn_downsamples.py new file mode 100644 index 00000000000..ff9a142aaa2 --- /dev/null +++ b/tests/ttnn/integration_tests/yolov4/test_ttnn_downsamples.py @@ -0,0 +1,109 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + + +import torch +import torch.nn as nn + +from ttnn.model_preprocessing import preprocess_model, preprocess_conv2d, fold_batch_norm2d_into_conv2d + +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.utility_functions import skip_for_wormhole_b0 +import tt_lib as ttl +import tt_lib.profiler as profiler + +from models.experimental.functional_yolov4.reference.downsamples import DownSamples +from models.experimental.functional_yolov4.tt.ttnn_downsamples import TtDownSamples + +import tests.ttnn.integration_tests.yolov4.custom_preprocessor_d1 as D1 +import tests.ttnn.integration_tests.yolov4.custom_preprocessor_d2 as D2 +import tests.ttnn.integration_tests.yolov4.custom_preprocessor_d3 as D3 +import tests.ttnn.integration_tests.yolov4.custom_preprocessor_d4 as D4 +import tests.ttnn.integration_tests.yolov4.custom_preprocessor_d5 as D5 + +import ttnn +import tt_lib +from ttnn.model_preprocessing import preprocess_conv2d, fold_batch_norm2d_into_conv2d +import ttnn + + +def create_custom_preprocessor(device): + def custom_preprocessor(model, name, ttnn_module_args): + parameters = {} + parameters["downsample1"] = D1.custom_preprocessor( + device, model.downsample1, name, ttnn_module_args["downsample1"] + ) + parameters["downsample2"] = D2.custom_preprocessor( + device, model.downsample2, name, ttnn_module_args["downsample2"] + ) + parameters["downsample3"] = D3.custom_preprocessor( + device, model.downsample3, name, ttnn_module_args["downsample3"] + ) + parameters["downsample4"] = D4.custom_preprocessor( + device, model.downsample4, name, ttnn_module_args["downsample4"] + ) + parameters["downsample5"] = D5.custom_preprocessor( + device, model.downsample5, name, ttnn_module_args["downsample5"] + ) + return parameters + + return custom_preprocessor + + +@skip_for_wormhole_b0() +def test_downsample1(device, reset_seeds): + state_dict = torch.load("tests/ttnn/integration_tests/yolov4/yolov4.pth") + ds_state_dict = { + k: v for k, v in state_dict.items() if (k.startswith(("down1.", "down2.", "down3.", "down4.", "down5."))) + } + torch_model = DownSamples() + + for layer in torch_model.children(): + print(layer) + + new_state_dict = {} + keys = [name for name, parameter in torch_model.state_dict().items()] + values = [parameter for name, parameter in ds_state_dict.items()] + + for i in range(len(keys)): + new_state_dict[keys[i]] = values[i] + + torch_model.load_state_dict(new_state_dict) + torch_model.eval() + + torch_input_tensor = torch.randn(1, 3, 320, 320) # Batch size of 1, 128 input channels, 160x160 height and width + torch_output_tensor = torch_model(torch_input_tensor) + + reader_patterns_cache = {} + parameters = preprocess_model( + initialize_model=lambda: torch_model, + run_model=lambda model: model(torch_input_tensor), + custom_preprocessor=create_custom_preprocessor(device), + reader_patterns_cache=reader_patterns_cache, + device=device, + ) + ttnn_model = TtDownSamples(parameters) + + # Tensor Preprocessing + # + input_shape = torch_input_tensor.shape + input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1)) + + input_tensor = input_tensor.reshape( + input_tensor.shape[0], 1, input_tensor.shape[1] * input_tensor.shape[2], input_tensor.shape[3] + ) + input_tensor = ttnn.from_torch(input_tensor, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT) + # output_tensor = ttnn_model(device, input_tensor) + with ttnn.tracer.trace(): + output_tensor = ttnn_model(device, input_tensor) + ttnn.tracer.visualize(output_tensor, file_name="downsamples.svg") + # + # Tensor Postprocessing + # + output_tensor = ttnn.to_torch(output_tensor) + output_tensor = output_tensor.reshape(1, 10, 10, 1024) + output_tensor = torch.permute(output_tensor, (0, 3, 1, 2)) + output_tensor = output_tensor.to(torch_input_tensor.dtype) + + assert_with_pcc(torch_output_tensor, output_tensor, pcc=0.99)