Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MPS] Fix MPS clamp issue with different dtypes between input and min/max tensors #105747

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions aten/src/ATen/native/mps/operations/TensorCompare.mm
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,43 @@
MPSGraphTensor *minTensor = nil, *maxTensor = nil;
};

void clamp_mps_graph(CachedGraph* cachedGraph, const Tensor& input_tensor) {
void clamp_mps_graph(CachedGraph* cachedGraph,
const Tensor& input_tensor,
const Tensor& min_tensor,
const Tensor& max_tensor) {
auto input_dtype = input_tensor.scalar_type();
auto min_dtype = input_dtype;
auto max_dtype = input_dtype;
if (cachedGraph->minTensor) {
min_dtype = min_tensor.scalar_type();
}
if (cachedGraph->maxTensor) {
max_dtype = max_tensor.scalar_type();
}
MPSGraph* mpsGraph = cachedGraph->graph();

cachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, input_tensor);

MPSGraphTensor* minTensor = cachedGraph->minTensor;
MPSGraphTensor* maxTensor = cachedGraph->maxTensor;
if (input_dtype != min_dtype) {
minTensor = castMPSTensor(mpsGraph, cachedGraph->minTensor, input_dtype);
}
if (input_dtype != max_dtype) {
maxTensor = castMPSTensor(mpsGraph, cachedGraph->maxTensor, input_dtype);
}
if (cachedGraph->minTensor && cachedGraph->maxTensor) {
cachedGraph->outputTensor = [mpsGraph clampWithTensor:cachedGraph->inputTensor
minValueTensor:cachedGraph->minTensor
maxValueTensor:cachedGraph->maxTensor
minValueTensor:minTensor
maxValueTensor:maxTensor
name:nil];
} else if (cachedGraph->maxTensor) {
cachedGraph->outputTensor = [mpsGraph minimumWithPrimaryTensor:cachedGraph->inputTensor
secondaryTensor:cachedGraph->maxTensor
secondaryTensor:maxTensor
name:nil];
} else if (cachedGraph->minTensor) {
cachedGraph->outputTensor = [mpsGraph maximumWithPrimaryTensor:cachedGraph->inputTensor
secondaryTensor:cachedGraph->minTensor
secondaryTensor:minTensor
name:nil];
}
}
Expand Down Expand Up @@ -134,14 +154,16 @@ void clamp_tensor_out_mps(const Tensor& input_t,

string key = op_name + (has_min ? "_min" : "") + (has_max ? "_max" : "") + "_tensor" + tensor_key;
auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
if (has_min)
if (has_min) {
newCachedGraph->minTensor = mpsGraphRankedPlaceHolder(mpsGraph, min_opt_tensor);
if (has_max)
}
if (has_max) {
newCachedGraph->maxTensor = mpsGraphRankedPlaceHolder(mpsGraph, max_opt_tensor);
;
}

clamp_mps_graph(newCachedGraph, input_t);
clamp_mps_graph(newCachedGraph, input_t, min_opt_tensor, max_opt_tensor);
});

auto inputPlaceholder = Placeholder(cachedGraph->inputTensor, input_t);
auto outputPlaceholder = Placeholder(cachedGraph->outputTensor, output_t);

Expand Down Expand Up @@ -199,7 +221,7 @@ void clamp_scalar_out_mps(const Tensor& input_t,
constantWithScalar:max_scalar
shape:(mps::getMPSShape(input_t))dataType:(mps::getMPSScalarType(input_t.scalar_type()))];

clamp_mps_graph(newCachedGraph, input_t);
clamp_mps_graph(newCachedGraph, input_t, input_t, input_t);
});

auto inputPlaceholder = Placeholder(cachedGraph->inputTensor, input_t);
Expand Down
16 changes: 16 additions & 0 deletions test/test_mps.py
Original file line number Diff line number Diff line change
Expand Up @@ -5451,6 +5451,22 @@ def helper(n, c, h, w):

helper(1, 1, 4, 5)

def test_clamp_fp16_fp32(self):
cpu_x = torch.randn(10, device='cpu', dtype=torch.float, requires_grad=False)
x = cpu_x.detach().clone().to('mps')

dtype = torch.float16

clamp_min_vals_mps = torch.ones(10, device="mps").to(torch.float16)
clamp_max_vals_mps = torch.ones(10, device="mps").to(torch.float16) * 10
clamp_result_mps = torch.clamp(x, clamp_min_vals_mps, clamp_max_vals_mps)

clamp_min_vals_cpu = torch.ones(10, device="cpu").to(torch.float16)
clamp_max_vals_cpu = torch.ones(10, device="cpu").to(torch.float16) * 10
clamp_result_cpu = torch.clamp(cpu_x, clamp_min_vals_cpu, clamp_max_vals_cpu)

self.assertEqual(clamp_result_mps, clamp_result_cpu)

# Test clamp_min
def test_clamp_min(self):
def helper(n, c, h, w):
Expand Down
Loading