[MPS] Fix sliced cast (#138535)

pytorchbot · malfet · web-flow · commit f31b8bbc5b88 · 2024-10-22T16:25:25.000-07:00
[MPS] Fix sliced cast (#138314) This fixes internal crash due to the invalid bufer size computation if sliced API is used Not sure what was the purpose of ```c++ IntArrayRef baseShape; if (src.is_view()) { baseShape = src._base().sizes(); } else { baseShape = getIMPSAllocator()->getBufferShape(src.storage().data()); } int flattenedShaped = 1; for (const auto i : c10::irange(baseShape.size())) { flattenedShaped *= baseShape[i]; } ``` As flattenShaped could be much easier computed as `[srcBuf lengh]/src.element_size()`, and even if `srcBuf` is padded it's a safe thing to do. When someone allocated buffer to hold say uint8 and that view-casted it to float16, attempt to compute `baseShape` returned sizes of original tensor in its data type, rather than size in new dtypes Fixes #137800 Pull Request resolved: #138314 Approved by: https://github.com/albanD, https://github.com/DenisVieriu97 (cherry picked from commit de16159) Co-authored-by: Nikita Shulga <nikita.shulga@gmail.com>
diff --git a/aten/src/ATen/native/mps/OperationUtils.mm b/aten/src/ATen/native/mps/OperationUtils.mm
@@ -542,18 +542,9 @@ void printTensorNDArray(const Tensor& t) {
     MPSShape* mpsShape = getMPSShape(_tensor);
     MPSShape* mpsStrides = getMPSShape(_tensor.strides());
 
-    IntArrayRef baseShape;
-    if (src.is_view()) {
-      baseShape = src._base().sizes();
-    } else {
-      baseShape = getIMPSAllocator()->getBufferShape(src.storage().data());
-    }
-    int flattenedShaped = 1;
-    for (const auto i : c10::irange(baseShape.size())) {
-      flattenedShaped *= baseShape[i];
-    }
-    MPSShape* mpsBaseShape = @[ @(flattenedShaped) ];
-    MPSNDArrayDescriptor* srcTensorDesc = [MPSNDArrayDescriptor descriptorWithDataType:dataType shape:mpsBaseShape];
+    auto storage_numel = src.storage().nbytes() / src.element_size();
+    MPSNDArrayDescriptor* srcTensorDesc = [MPSNDArrayDescriptor descriptorWithDataType:dataType
+                                                                                 shape:@[ @(storage_numel) ]];
     srcTensorDesc.preferPackedRows = YES;
     MPSNDArray* srcNDArray = [[[MPSNDArray alloc] initWithBuffer:srcBuf
                                                           offset:src.storage_offset() * src.element_size()
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -10964,6 +10964,12 @@ def test_nonzero_multi_threading(self):
         t1.start()
         t2.start()
 
+    def test_sliced_view_cast(self):
+        # This used to crash on MacOS Sequoia
+        # See https://github.com/pytorch/pytorch/issues/137800
+        x = torch.rand(16, 16, device='mps', dtype=torch.float16)
+        y = x[:, 0:2].view(torch.float32) + 1
+
     def test_masked_select(self):
         x = torch.randn(3, 4)
         x_mps = x.to("mps")