Skip to content

Commit

Permalink
tensorflow: Merge pull request #58788 from trevor-m:tmorris-bf16-spac…
Browse files Browse the repository at this point in the history
…edepth

Commit: 95eb6e503c6f30d593fc2803f19c62e1a3e60d23
  • Loading branch information
TensorFlower Gardener authored and sourcegraph-bot committed Dec 27, 2022
1 parent eed08bc commit ac79dc4
Show file tree
Hide file tree
Showing 11 changed files with 54 additions and 27 deletions.
1 change: 1 addition & 0 deletions tensorflow/tensorflow/core/kernels/batchtospace_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
BatchToSpaceOp<GPUDevice, T>);

TF_CALL_GPU_NUMBER_TYPES(REGISTER);
TF_CALL_bfloat16(REGISTER);
#undef REGISTER
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

Expand Down
4 changes: 4 additions & 0 deletions tensorflow/tensorflow/core/kernels/depthtospace_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ REGISTER_KERNEL_BUILDER(
REGISTER_KERNEL_BUILDER(
Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
DepthToSpaceOp<GPUDevice, Eigen::half>);
REGISTER_KERNEL_BUILDER(Name("DepthToSpace")
.Device(DEVICE_GPU)
.TypeConstraint<Eigen::bfloat16>("T"),
DepthToSpaceOp<GPUDevice, Eigen::bfloat16>);
REGISTER_KERNEL_BUILDER(
Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint<qint8>("T"),
DepthToSpaceOp<GPUDevice, qint8>);
Expand Down
6 changes: 6 additions & 0 deletions tensorflow/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,12 @@ template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::half,
template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::half,
FORMAT_NHWC>;

// Instantiate the GPU implementations for Eigen::bfloat16.
template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::bfloat16,
FORMAT_NCHW>;
template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::bfloat16,
FORMAT_NHWC>;

// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
template struct functor::DepthToSpaceOpFunctor<GPUDevice, int32, FORMAT_NCHW>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ struct SpaceToBatchFunctor<GPUDevice, T, NUM_BLOCK_DIMS, B2S> {
#define INSTANTIATE_FOR_T(T) \
TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T)

TF_CALL_GPU_NUMBER_TYPES(INSTANTIATE_FOR_T)
TF_CALL_GPU_NUMBER_TYPES(INSTANTIATE_FOR_T);
TF_CALL_bfloat16(INSTANTIATE_FOR_T);

#undef INSTANTIATE_FOR_T
#undef INSTANTIATE
Expand Down
1 change: 1 addition & 0 deletions tensorflow/tensorflow/core/kernels/spacetobatch_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
SpaceToBatchOp<GPUDevice, T>);

TF_CALL_GPU_NUMBER_TYPES(REGISTER);
TF_CALL_bfloat16(REGISTER);
#undef REGISTER
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

Expand Down
4 changes: 4 additions & 0 deletions tensorflow/tensorflow/core/kernels/spacetodepth_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ REGISTER_KERNEL_BUILDER(
REGISTER_KERNEL_BUILDER(
Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
SpaceToDepthOp<GPUDevice, Eigen::half>);
REGISTER_KERNEL_BUILDER(Name("SpaceToDepth")
.Device(DEVICE_GPU)
.TypeConstraint<Eigen::bfloat16>("T"),
SpaceToDepthOp<GPUDevice, Eigen::bfloat16>);
REGISTER_KERNEL_BUILDER(
Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint<qint8>("T"),
SpaceToDepthOp<GPUDevice, qint8>);
Expand Down
6 changes: 6 additions & 0 deletions tensorflow/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ template struct functor::SpaceToDepthOpFunctor<GPUDevice, Eigen::half,
template struct functor::SpaceToDepthOpFunctor<GPUDevice, Eigen::half,
FORMAT_NHWC>;

// Instantiate the GPU implementations for Eigen::bfloat16.
template struct functor::SpaceToDepthOpFunctor<GPUDevice, Eigen::bfloat16,
FORMAT_NCHW>;
template struct functor::SpaceToDepthOpFunctor<GPUDevice, Eigen::bfloat16,
FORMAT_NHWC>;

// Instantiate the GPU implementations for uint8.
template struct functor::SpaceToDepthOpFunctor<GPUDevice, uint8, FORMAT_NCHW>;
template struct functor::SpaceToDepthOpFunctor<GPUDevice, uint8, FORMAT_NHWC>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
op is tested in tandem with its reverse SpaceToBatch op.
"""

from absl.testing import parameterized
import numpy as np

from tensorflow.python.framework import constant_op
Expand All @@ -44,12 +45,14 @@ def batch_to_space(*args, **kwargs):
return gen_array_ops.batch_to_space(*args, **kwargs)


class BatchToSpaceDepthToSpace(test.TestCase, PythonOpImpl):
class BatchToSpaceDepthToSpace(test.TestCase, parameterized.TestCase,
PythonOpImpl):

# Verifies that: batch_to_space(x) = transpose(depth_to_space(transpose(x)))
@parameterized.parameters(np.float32, dtypes.bfloat16.as_numpy_dtype)
@test_util.run_deprecated_v1
def testDepthToSpaceTranspose(self):
x = np.arange(20 * 5 * 8 * 7, dtype=np.float32).reshape([20, 5, 8, 7])
def testDepthToSpaceTranspose(self, dtype):
x = np.arange(20 * 5 * 8 * 7, dtype=dtype).reshape([20, 5, 8, 7])
block_size = 2
for crops_dtype in [dtypes.int64, dtypes.int32]:
crops = array_ops.zeros((2, 2), dtype=crops_dtype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

"""Functional tests for DepthToSpace op."""

from absl.testing import parameterized
import numpy as np

from tensorflow.python.client import device_lib
Expand All @@ -31,7 +32,7 @@
from tensorflow.python.platform import tf_logging


class DepthToSpaceTest(test.TestCase):
class DepthToSpaceTest(test.TestCase, parameterized.TestCase):

def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32):
input_nhwc = math_ops.cast(inputs, dtype)
Expand Down Expand Up @@ -63,19 +64,13 @@ def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32):
output_nhwc = test_util.NCHWToNHWC(output_nchw)
self.assertAllEqual(output_nhwc, outputs)

@parameterized.parameters(dtypes.float32, dtypes.float16, dtypes.bfloat16)
@test_util.run_deprecated_v1
def testBasic(self):
def testBasic(self, dtype):
x_np = [[[[1, 2, 3, 4]]]]
block_size = 2
x_out = [[[[1], [2]], [[3], [4]]]]
self._testOne(x_np, block_size, x_out)

@test_util.run_deprecated_v1
def testBasicFloat16(self):
x_np = [[[[1, 2, 3, 4]]]]
block_size = 2
x_out = [[[[1], [2]], [[3], [4]]]]
self._testOne(x_np, block_size, x_out, dtype=dtypes.float16)
self._testOne(x_np, block_size, x_out, dtype)

# Tests for larger input dimensions. To make sure elements are
# correctly ordered spatially.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# ==============================================================================
"""Functional tests for SpaceToBatch and BatchToSpace ops."""

from absl.testing import parameterized
import numpy as np

from tensorflow.python.framework import constant_op
Expand Down Expand Up @@ -92,38 +93,41 @@ def batch_to_space(*args, **kwargs):
return gen_array_ops.batch_to_space(*args, **kwargs)


class SpaceToBatchTest(test.TestCase, PythonOpImpl):
class SpaceToBatchTest(test.TestCase, parameterized.TestCase, PythonOpImpl):
"""Tests input-output pairs for the SpaceToBatch and BatchToSpace ops.
This uses the Python compatibility wrapper that forwards to space_to_batch_nd.
"""

def _testPad(self, inputs, paddings, block_size, outputs):
def _testPad(self,
inputs,
paddings,
block_size,
outputs,
dtype=dtypes.float32):
with self.cached_session():
# outputs = space_to_batch(inputs)
x_tf = self.space_to_batch(
math_ops.cast(inputs, dtypes.float32),
paddings,
block_size=block_size)
math_ops.cast(inputs, dtype), paddings, block_size=block_size)
self.assertAllEqual(x_tf, outputs)
# inputs = batch_to_space(outputs)
x_tf = self.batch_to_space(
math_ops.cast(outputs, dtypes.float32),
paddings,
block_size=block_size)
math_ops.cast(outputs, dtype), paddings, block_size=block_size)
self.assertAllEqual(x_tf, inputs)

def _testOne(self, inputs, block_size, outputs):
def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32):
paddings = np.zeros((2, 2), dtype=np.int32)
self._testPad(inputs, paddings, block_size, outputs)
self._testPad(inputs, paddings, block_size, outputs, dtype)

# [1, 2, 2, 1] <-> [4, 1, 1, 1]
@parameterized.parameters(dtypes.float32, dtypes.float16, dtypes.bfloat16,
dtypes.uint8)
@test_util.run_deprecated_v1
def testSmallInput2x2(self):
def testSmallInput2x2(self, dtype):
x_np = [[[[1], [2]], [[3], [4]]]]
block_size = 2
x_out = [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
self._testOne(x_np, block_size, x_out)
self._testOne(x_np, block_size, x_out, dtype)

# [1, 2, 2, 1] <-> [1, 3, 3, 1] (padding) <-> [9, 1, 1, 1]
@test_util.run_deprecated_v1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def testBasic(self):
x_np = [[[[1], [2]], [[3], [4]]]]
block_size = 2
x_out = [[[[1, 2, 3, 4]]]]
for dtype in [dtypes.float32, dtypes.float16, dtypes.uint8]:
for dtype in [
dtypes.float32, dtypes.float16, dtypes.bfloat16, dtypes.uint8
]:
self._testOne(x_np, block_size, x_out, dtype=dtype)


Expand Down

0 comments on commit ac79dc4

Please sign in to comment.