Lines Matching +full:- +full:- +full:mps
1 # Owner(s): ["module: mps"]
29 import torch.backends.mps
125 # cpu error: grad requires non-empty inputs
161 # Failures due to precision issues (due to fast-math). These has been fixed in MacOS 13.3+
171 …ss for `sort` both are used (values and indices), thus resulting in a issmatch between CPU and MPS.
190 …ss for `sort` both are used (values and indices), thus resulting in a issmatch between CPU and MPS.
196 # Failures due to lack of implementation of downstream functions on MPS backend
200 # Exception: Caused by sample input at index 3 on MPS
204 def addDecorator(op, d) -> None:
225 if key in MACOS_12_3_XFAILLIST_GRAD and (not torch.backends.mps.is_macos13_or_newer()):
230 …if key in MACOS_BEFORE_13_3_XFAILLIST_GRAD and (torch.backends.mps.is_macos13_or_newer() and produ…
501 # Failures due to precision issues (due to fast-math). These has been fixed in MacOS 13.3+
601 # inconsistency errors between cpu and mps, max seen atol is 2
606 # Failures due to precision issues (due to fast-math). These has been fixed in MacOS 13.3+
615 # - CPU output: tensor([2546, 6917, 3181, ..., 7128, 5133, 30], devuce='cpu')
616 # - MPS output: tensor([2546, 6917, 3181, ..., 7128, 30, 5133], device='mps:0')
635 # inconsistency errors between cpu and mps, max seen atol is 2
641 # on both cpu and mps there are test cases that might produce inf result
646 # - CPU output: tensor([2546, 6917, 3181, ..., 7128, 5133, 30], devuce='cpu')
647 # - MPS output: tensor([2546, 6917, 3181, ..., 7128, 30, 5133], device='mps:0')
663 # Failures due to lack of op implementation on MPS backend
830 # MPS: input sizes must be divisible by output sizes
840 # Convolution for integral types is not supported on MPS
855 # GEMM on MPS is not supported for integral types
874 # new_zeros/new_ones: Cannot convert a MPS Tensor to float64 dtype as
875 # the MPS framework doesn't support float64
953 # duplicate indices are used in the testcase - undefined behaviour
969 # mps vs cpu:
971 # Greatest absolute difference: 17.892311096191406 at index (1, 0, 2) (up to 1e-05 allowed)
972 # Greatest relative difference: inf at index (1, 0, 0) (up to 1.3e-06 allowed)
975 # Greatest absolute difference: 17.892311096191406 at index (1, 0, 2) (up to 1e-05 allowed)
976 # Greatest relative difference: inf at index (1, 0, 0) (up to 1.3e-06 allowed)
979 # float output for float16 input on MPS
984 # Failures due to lack of implementation of downstream functions on MPS backend
997 # CPU: empty is returning all 0's and there is a mismatch with MPS
1016 def addDecorator(op, d) -> None:
1039 …if key in MACOS_BEFORE_13_3_XFAILLIST and (torch.backends.mps.is_macos13_or_newer() and product_ve…
1044 if key in MACOS_AFTER_13_1_XFAILLIST and torch.backends.mps.is_macos13_or_newer(2):
1054 if key in MACOS_12_3_XFAILLIST and (not torch.backends.mps.is_macos13_or_newer()):
1091 # MPS does not support tensor dimensions > 16
1103 def addDecorator(op, d) -> None:
1116 if not torch.backends.mps.is_available():
1117 print('MPS not available, skipping tests', file=sys.stderr)
1121 product_version = float('.'.join(platform.mac_ver()[0].split('.')[:2]) or -1)
1122 total_memory = int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]))
1124 # Determine whether to enable MPS memory leak check (uses same code as CUDA).
1141 caching_allocator_mem_allocated = torch.mps.current_allocated_memory()
1144 torch.mps.empty_cache()
1147 self.caching_allocator_before = torch.mps.current_allocated_memory()
1148 self.driver_before = torch.mps.driver_allocated_memory()
1157 caching_allocator_mem_allocated = torch.mps.current_allocated_memory()
1161 # Short-circuits if no discrepancy detected
1167 torch.mps.empty_cache()
1172 caching_allocator_mem_allocated = torch.mps.current_allocated_memory()
1173 driver_mem_allocated = torch.mps.driver_allocated_memory()
1191 msg = ("MPS caching allocator reports a memory leak not "
1195 … f"MPS driver allocated memory was {self.driver_before} and is now {driver_mem_allocated}.")
1199 msg = (f"MPS driver API confirmed a leak in {self.name}! "
1202 … f"MPS driver allocated memory was {self.driver_before} and is now {driver_mem_allocated}.")
1209 autocast_tensor_A = torch.rand((8, 8), device="mps")
1210 autocast_tensor_B = torch.rand((8, 8), device="mps")
1216 with torch.autocast(device_type="mps"):
1226 f"Autocast & non-autocast tensors did not match, \
1229 # Expand TestCase class with Memory Leak Detection on MPS device
1237 # Wraps the tested method if we should do MPS memory check.
1266 l.append(torch.randn(1024 * 1024 * 8, device=torch.device("mps")))
1272 with self.assertRaisesRegex(RuntimeError, r"MPS driver API confirmed .+"):
1279 x = x.to(device='mps', dtype=torch.float16)
1281 a = torch.randn(128, 128, device='mps', dtype=torch.float16)
1282 # Warm up / prebuild MPS shaders (otherwise check fails on 13.2)
1284 torch.mps.empty_cache()
1285 driver_before = torch.mps.driver_allocated_memory()
1287 torch.mps.empty_cache()
1288 driver_after = torch.mps.driver_allocated_memory()
1289 …self.assertEqual(driver_before, driver_after, f"Detected {driver_after-driver_before} bytes leak o…
1304 input = torch.rand(channels, requires_grad=True, device='mps')
1307 input = torch.rand(width, height, requires_grad=True, device='mps').T
1311 batch_sizes = [random.randint(1, 3) for _ in range(num_input_dims - 3)]
1312 … input = torch.rand(*batch_sizes, channels, width, height, requires_grad=True, device='mps')
1313 input = input.transpose(-1, -2)
1317 if not is_contiguous and len(input.reshape(-1)) > 0:
1327 for c in range(output.size(-3)):
1328 for h in range(output.size(-2)):
1329 for w in range(output.size(-1)):
1364 input = torch.rand(channels, requires_grad=True, device='mps')
1366 input = torch.rand(height, width, requires_grad=True, device='mps')
1368 batch_sizes = [random.randint(1, 3) for _ in range(num_input_dims - 3)]
1369 … input = torch.rand(*batch_sizes, channels, height, width, requires_grad=True, device='mps')
1375 # For 1D - 2D, this is an error case.
1376 # For 3D - 5D, this is a success case for pixel_shuffle + pixel_unshuffle.
1389 num_input_dims=num_input_dims, upscale_factor=-2, is_contiguous=is_contiguous
1396 … _test_pixel_unshuffle_error_case_helper(num_input_dims=num_input_dims, downscale_factor=-2)
1427 np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7,
1456 np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
1459 np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
1465 np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
1466 device="mps")
1468 np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
1469 device="mps")
1470 self._testRelu(np.array([]).astype(t), device="mps")
1471 self._testReluInPlace(np.array([]).astype(t), device="mps")
1476 tensor1_mps = torch.randn(shape_tensor_1, device="mps").expand(expand_tensor_1_shape)
1478 tensor1_mps = torch.randn(shape_tensor_1, device="mps")
1481 tensor2_mps = torch.randn(shape_tensor_2, device="mps").expand(expand_tensor_2_shape)
1483 tensor2_mps = torch.randn(shape_tensor_2, device="mps")
1518 np.array([[-0.09, 0.7, -0.05, 0.3, -0.01],
1519 [0.1, -0.03, 0.5, -0.07, 0.9]]),
1521 np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7,
1527 mps_x = cpu_x.detach().clone().to('mps')
1530 # Tranposing will make the tensor non-contiguous
1547 mps_grad = cpu_grad.to('mps')
1552 assert cpu_x.grad is not None # Check that the grad is well-populated
1599 self.assertEqual(actual, expected, rtol=0, atol=1e-5)
1602 # Regression test for gh-36977
1609 x.to('mps'), ceil_mode=True, count_include_pad=True, kernel_size=(1, 2),
1615 def test_exp(self, device="mps", dtype=torch.float):
1616 for v in (2, -2) + ((1j, 1 + 1j) if dtype.is_complex else ()):
1618 a = torch.tensor(v, dtype=dtype, device="mps") * b
1621 def test_conv_raises_error(self, device='mps', dtype=torch.float):
1622 conv = nn.Conv1d(1, 65537, 3, padding=1).to('mps')
1626 y = conv(x.to("mps"))
1628 def test_triu_inf(self, device="mps", dtype=torch.float):
1629 for diag in [-1, 0, 1]:
1630 mask = torch.full((3, 6, 6), float("-inf"))
1631 mask_mps = mask.clone().detach().to('mps')
1636 def test_exp1(self, device="mps", dtype=torch.float):
1637 input = torch.tensor([-0.1, 1.0, -0.9, 0.1], device=device, dtype=dtype)
1640 # If exponentWithTensor: MPS call is used on M1 running 14.5 test will fail with
1642 # Greatest absolute difference: 1.1920928955078125e-07 at index (3,) (up to 1e-08 allowed)
1643 # Greatest relative difference: 1.0786502002702036e-07 at index (3,) (up to 1e-08 allowed)
1644 self.assertEqual(output, output_cpu, atol=1e-8, rtol=1e-8)
1647 x = torch.rand((256, 10), device='mps')
1659 mps_x = torch.from_numpy(np_features).to('mps').requires_grad_()
1668 mps_grad = cpu_grad.to('mps')
1676 np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
1678 device="mps")
1683 tensor = torch.zeros(shape, device='mps', dtype=dtype)
1698 tensor = torch.ones(shape, device="mps")
1708 tensor = torch.ones(shape, device="mps")
1709 val_tensor_mps = torch.tensor(val, device="mps")
1721 def test_cdist_large(self, device="mps"):
1729 def test_cdist_large_batch(self, device="mps"):
1737 def test_cdist_non_contiguous(self, device="mps"):
1763 def test_cdist_non_contiguous_batch(self, device="mps"):
1789 def test_cdist_euclidean_large(self, device="mps"):
1795 eps = 1e-6
1797 x = x - (((x - y) < eps).float() * 2 * eps)
1808 def test_cdist_same_inputs(self, device="mps"):
1816 eps = 1e-6
1826 r1 = x.shape[-2]
1827 r2 = y.shape[-2]
1830 return torch.norm(x[..., None, :] - y[..., None, :, :], p=p, dim=-1)
1832 def test_cdist_norm(self, device="mps"):
1849 def test_cdist_norm_batch(self, device="mps"):
1867 B = torch.ones(5, 6).to("mps")
1868 C = torch.ones(6, 5).to("mps")
1874 device = "mps"
1914 a = torch.randn(4, 3, device="mps")
1915 b = torch.randn(4, 3, device="mps")
1923 A = torch.ones(5, 5).to("mps")
1924 B = torch.ones(5, 6).to("mps")
1925 C = torch.ones(6, 5).to("mps")
1933 batch1_mps = batch1_cpu.detach().clone().to("mps")
1934 batch2_mps = batch2_cpu.detach().clone().to("mps")
1945 batch1 = torch.randn(11, 20064, 128, dtype=dtype, device='mps')
1946 batch2 = torch.randn(11, 128, 20064, dtype=dtype, device='mps')
1951 tol = 1e-2 if dtype == torch.float16 else None
1957 A = torch.ones(5, 10).to("mps")
1958 B = torch.ones(5).to("mps")
1959 C = torch.ones(10).to("mps")
1965 M_mps = M_cpu.detach().clone().to("mps")
1978 M_mps = M_cpu.detach().clone().to("mps")
1979 batch1_mps = batch1_cpu.detach().clone().to("mps")
1980 batch2_mps = batch2_cpu.detach().clone().to("mps")
1996 M_mps = M_cpu.detach().clone().to("mps")
1997 batch1_mps = batch1_cpu.detach().clone().to("mps")
1998 batch2_mps = batch2_cpu.detach().clone().to("mps")
2012 y_mps = x_cpu.to("mps")
2019 x_mps = x.to('mps')
2020 projected_mps = projected.to('mps')
2028 x_mps = x.to('mps')
2029 projected_mps = projected.to('mps')
2042 device = "mps"
2052 # Mixed CPU<->MPS tensors
2056 with self.assertRaisesRegex(RuntimeError, "does not support linear for non-float weights"):
2057 torch.nn.functional.linear(torch.rand(size, device='mps'),
2058 torch.randint(-10, 10, size, dtype=torch.int8, device='mps'))
2061 with self.assertRaisesRegex(RuntimeError, "argument weight is on cpu but expected on mps"):
2062 torch.nn.functional.linear(torch.rand(size, device='mps'),
2066 with self.assertRaisesRegex(RuntimeError, "argument input is on cpu but expected on mps"):
2068 torch.rand(size, device='mps'))
2072 …near = torch.nn.Linear(in_features=in_features, out_features=out_features, device="mps", bias=bias)
2075 mps_linear.weight.data = cpu_linear.weight.data.detach().clone().to("mps")
2078 mps_linear.bias.data = cpu_linear.bias.data.detach().clone().to("mps")
2080 linear_mps_input = torch.randn(shape).to('mps')
2095 grad = cpu_grad.detach().to('mps').requires_grad_()
2101 …self.assertEqual(linear_cpu_input.grad, linear_mps_input.grad.to("cpu"), atol=8e-04, rtol=10.4e-05)
2104 …lf.assertEqual(cpu_linear.weight.grad, mps_linear.weight.grad.to("cpu"), atol=8e-04, rtol=10.4e-05)
2107 … self.assertEqual(cpu_linear.bias.grad, mps_linear.bias.grad.to("cpu"), atol=8e-04, rtol=10.4e-05)
2111 x_grad_out_mps = x_grad_out.to("mps")
2113 w_grad_out_mps = w_grad_out.to("mps")
2121 b_grad_out_mps = b_grad_out.to("mps")
2183 below_low = torch.tensor([-1.0])
2184 self.assertEqual(uniform.log_prob(above_high).item(), -inf)
2185 self.assertEqual(uniform.log_prob(below_low).item(), -inf)
2196 self.assertEqual(low.grad, 1 - rand)
2201 def test_randperm(self, device="mps"):
2226 # Test non-contiguous tensors
2244 x = cpu_x.detach().clone().to('mps').requires_grad_()
2254 grad = cpu_grad.to('mps')
2266 grad = cpu_grad.to('mps')
2312 x = torch.randint(1, 10, size, dtype=torch.float, device='mps', requires_grad=True)
2320 ref_out = x.contiguous().mean((-1, -2)).view((x.size(0), x.size(1), 1, 1))
2338 x_mps = torch.randn(shape, device="mps")
2341 mask_mps = torch.rand(shape, device="mps") < 0.6
2344 y_mps = torch.randn(shape, device="mps")
2358 device = "mps"
2378 x_mps = torch.randn(shape, device="mps")
2380 mask_mps = torch.zeros(shape, device="mps", dtype=torch.bool)
2386 x_mps_strided.masked_fill_(mask_mps.T, float("-inf"))
2387 x_cpu_strided.masked_fill_(mask_cpu.T, float("-inf"))
2390 self.assertFalse((x_mps_strided == float("-inf")).any())
2396 arr = (256 - 128) * np.random.random_sample(size=shape) + 128
2401 x = cpu_x.detach().clone().to('mps').requires_grad_()
2415 arr = (256 - 128) * np.random.random_sample(size=shape) + 128
2420 x = cpu_x.detach().clone().to('mps').requires_grad_()
2428 mean_arr = (240 - 140) * np.random.random_sample(size=mean_shape) + 140
2432 running_mean = cpu_running_mean.detach().clone().to('mps')
2433 running_var = cpu_running_var.detach().clone().to('mps')
2441 weight = cpu_weight.detach().clone().to('mps').requires_grad_()
2443 bias = cpu_bias.detach().clone().to('mps').requires_grad_()
2477 device='mps')
2490 device='mps')
2503 device='mps')
2528 grad = cpu_grad.to('mps')
2553 … helper(shape, eps=1e-05, momentum=0.1, wts=False, training=False, channels_last=channels_last,
2561 … helper(shape, eps=1e-05, momentum=0.1, wts=False, training=True, channels_last=channels_last,
2571 inputs = torch.rand(1, 8, 4, 4, device="mps", requires_grad=True)
2572 x = torch.nn.BatchNorm2d(8).to("mps")
2573 y = torch.nn.BatchNorm2d(8).to("mps")
2583 bn_mps = nn.BatchNorm2d(100, affine=False, device='mps')
2586 x_mps = x_cpu.to('mps')
2594 inputs = torch.rand(4, 4, device="mps", requires_grad=True)
2595 x = torch.nn.LayerNorm(4).to("mps")
2596 y = torch.nn.LayerNorm(4).to("mps")
2604 a = torch.arange(9, dtype=torch.float, device="mps") - 4
2607 a_cpu = torch.arange(9, dtype=torch.float, device="cpu") - 4
2626 c = torch.tensor([[1, 2, 3], [-1, 1, 4]], dtype=torch.float, device="mps")
2627 c_cpu = torch.tensor([[1, 2, 3], [-1, 1, 4]] , dtype=torch.float, device="cpu")
2641 d = torch.arange(8, dtype=torch.float, device="mps").reshape(2, 2, 2)
2653 x_mps = torch.tensor([0, 0, 0, 2, 3], dtype=torch.float, device="mps")
2660 a_mps = torch.arange(27, dtype=torch.float, device="mps") - 4
2661 a_cpu = torch.arange(27, dtype=torch.float, device="cpu") - 4
2681 # TODO: Test non-contiguous
2682 …def helper(input_shape, normalized_shape, eps=1e-05, elementwise_affine=True, dtype=torch.float32):
2684 x = cpu_x.detach().clone().to('mps').requires_grad_()
2687 …erNorm(normalized_shape, eps=eps, elementwise_affine=elementwise_affine, device='mps', dtype=dtype)
2689 wt = cpu_wt.detach().clone().to('mps').requires_grad_()
2691 bias = cpu_bias.detach().clone().to('mps').requires_grad_()
2703 grad = cpu_grad.to('mps')
2720 …torch.nn.LayerNorm((16,), elementwise_affine=True).to("mps")(torch.randn(1, 2, 16).to("mps", dtype…
2725 device = torch.device("mps")
2738 freq_mps = torch.fft.fftfreq(10**4, device='mps')
2746 arr = (256 - 128) * np.random.random_sample(size=shape) + 128
2751 x = cpu_x.detach().clone().to('mps').requires_grad_()
2759 mean_arr = (240 - 140) * np.random.random_sample(size=mean_shape) + 140
2763 running_mean = cpu_running_mean.detach().clone().to('mps')
2764 running_var = cpu_running_var.detach().clone().to('mps')
2772 weight = cpu_weight.detach().clone().to('mps').requires_grad_()
2774 bias = cpu_bias.detach().clone().to('mps').requires_grad_()
2806 device='mps')
2819 device='mps')
2832 device='mps')
2857 grad = cpu_grad.to('mps')
2882 … helper(shape, eps=1e-05, momentum=0.1, wts=False, channels_last=channels_last,
2890 … helper(shape, eps=1e-05, momentum=0.1, wts=False, channels_last=channels_last,
2910 grad = cpu_grad.to('mps')
2923 x = cpu_x.detach().clone().to('mps').requires_grad_()
2926 weight = cpu_weight.detach().clone().to('mps').requires_grad_()
2929 bias = cpu_bias.detach().clone().to('mps').requires_grad_()
2932 linear = torch.nn.Linear(5, 10, device='mps')
2944 x = cpu_x.detach().clone().to('mps').requires_grad_()
2947 conv = torch.nn.Conv2d(3, 3, 3, device='mps')
2958 x = cpu_x.detach().clone().to('mps').requires_grad_()
2961 conv = torch.nn.Conv3d(3, 3, 3, device='mps')
2971 helper(-1, layer='linear')
2977 helper(-1, layer='conv')
2986 helper(-1, layer='conv3d')
2996 x = cpu_x.detach().clone().to('mps').requires_grad_()
2999 wt = cpu_wt.detach().clone().to('mps').requires_grad_()
3006 bias = cpu_bias.detach().clone().to('mps').requires_grad_()
3014 grad = cpu_grad.to('mps')
3019 self.assertEqual(y, ref_y, rtol=2.6e-05, atol=2e-04)
3020 self.assertEqual(x.grad, cpu_x.grad, rtol=2.6e-06, atol=2e-05)
3021 self.assertEqual(wt.grad, cpu_wt.grad, atol=8e-04, rtol=10.4e-05)
3023 self.assertEqual(bias.grad, cpu_bias.grad, atol=8e-04, rtol=10.4e-05)
3070 x = cpu_x.detach().clone().to('mps').requires_grad_()
3073 wt = cpu_wt.detach().clone().to('mps').requires_grad_()
3080 bias = cpu_bias.detach().clone().to('mps').requires_grad_()
3089 grad = cpu_grad.to('mps')
3094 self.assertEqual(y, ref_y, rtol=2.6e-05, atol=2e-04)
3095 self.assertEqual(x.grad, cpu_x.grad, rtol=2.6e-06, atol=2e-05)
3096 self.assertEqual(wt.grad, cpu_wt.grad, atol=8e-04, rtol=10.4e-05)
3134 x = cpu_x.detach().clone().to('mps').requires_grad_()
3142 grad = cpu_grad.to('mps')
3159 x = cpu_x.detach().clone().to('mps').requires_grad_()
3167 grad = cpu_grad.to('mps')
3184 input_mps = input_cpu.detach().clone().to('mps').requires_grad_(requires_grad)
3190 mps_grad = cpu_grad.to('mps')
3201 helper(threshold=-0.1, value=10, num_elems=10)
3202 helper(threshold=0.5, value=-15, num_elems=100)
3210 x = cpu_x.detach().clone().to('mps')
3212 y = cpu_y.detach().clone().to('mps')
3220 x = cpu_x.detach().clone().to('mps')
3230 y = cpu_y.detach().clone().to('mps')
3247 x = cpu_x.detach().clone().to('mps')
3250 y = cpu_y.detach().clone().to('mps')
3253 z = cpu_z.detach().clone().to('mps')
3284 mps_x = cpu_x.detach().clone().to('mps')
3285 mps_y = cpu_y.detach().clone().to('mps')
3286 mps_z = cpu_z.detach().clone().to('mps')
3287 mps_out = cpu_out.detach().clone().to('mps')
3304 shape_t = shape[::-1]
3312 x_mps = x.detach().clone().to(device="mps")
3313 y_mps = y.detach().clone().to(device="mps")
3314 z_mps = z.detach().clone().to(device="mps")
3318 result_mps_out = result_cpu.detach().clone().to('mps')
3335 mps_A = cpu_A.to('mps')
3336 mps_F = cpu_F.to('mps')
3342 mps_x = torch.tensor(values, device='mps')
3351 x = torch.tensor(1).expand([10]).to("mps")
3359 a1 = torch.Tensor([[1, 2], [3, 4], [5, 6]]).to(torch.device("mps"))
3360 b1 = torch.Tensor([-1, -1])
3363 a2 = torch.Tensor([[1, 2], [3, 4], [5, 6]]).to(torch.device("mps"))
3364 b2 = torch.Tensor([-1, -1]).to(torch.device("mps"))
3370 x = torch.randn([1, 4, 4], device="mps")
3381 x = torch.randn([1, 6, 4, 2], dtype=torch.float, device="mps")
3398 lin_mps = nn.Linear(10, 256, device="mps")
3401 lin_mps.weight.data = lin_cpu.weight.data.detach().clone().to("mps").requires_grad_()
3402 lin_mps.bias.data = lin_cpu.bias.data.detach().clone().to("mps").requires_grad_()
3404 x_mps = torch.rand([B, T, 10], device="mps", requires_grad=True)
3412 cls_token_mps = torch.rand([1, 256], device="mps", requires_grad=True).repeat(B, 1, 1)
3433 x_mps = x_cpu.detach().clone().to("mps")
3435 y_cpu = torch.stack((x_cpu[:, :1], x_cpu[:, -1:]), dim=-1)
3436 y_mps = torch.stack((x_mps[:, :1], x_mps[:, -1:]), dim=-1)
3440 t_mps = torch.tensor([1, 2, 3, 4], device="mps")
3449 res_mps = torch.stack((y_mps, x_mps), dim=-1)
3450 res_cpu = torch.stack((y_cpu, x_cpu), dim=-1)
3459 a_mps = a.to("mps")
3467 mps_in = cpu_in.detach().clone().to("mps")
3468 # check copy_cast(unit8 -> bool) on tensors with storage offset
3476 x_mps = torch.randn(1, 4800, 2, device="mps")
3488 t_mps = torch.randn(shape, device="mps")
3542 x = torch.randn(2, 3, 3, device="mps")
3560 idx_mps = idx.to("mps")
3561 pts_mps = pts.to("mps")
3566 actual_pts_mps = torch.zeros(NUM_SAMPLES, X.shape[1], dtype=torch.float, device="mps")
3576 tensor = torch.randint(10, shape, device="mps")
3578 torch.empty(shape[0], shape[1] * 2, device="mps")[:, ::2].copy_(tensor)
3584 mps_x = (torch.tensor(values, device='mps', dtype=torch.float))
3606 x_mps = x_tensor.detach().clone().to("mps")
3624 # Tests for previously encountered MPS bugs
3649 lambda x: x.transpose(0, 1).reshape(-1),
3661 x_mps = x.detach().clone().to("mps")
3674 tensor_list = torch.tensor([1.0, 1.2], device="mps")
3684 tensor_list = torch.tensor([1.0, 1.2, 2.5, 1.0], device="mps")
3699 t_mps = torch.tensor([1, 2, 3, 4], device="mps")
3729 res_mps = torch.stack((y_mps, x_mps), dim=-1)
3730 res_cpu = torch.stack((y_cpu, x_cpu), dim=-1)
3739 x_mps = torch.tensor([0.5, 0.5], device="mps")
3752 a = torch.tensor([8.2670e-01, -1.0293e+00])
3758 …# when copying from 'cpu' to 'mps', c will have a storage_offset of 1 which needs to be taking int…
3760 b = b_cpu.to('mps')
3761 c = c_cpu.to('mps')
3774 mps_x = torch.tensor(values, device='mps')
3793 x = cpu_x.detach().clone().to('mps').requires_grad_()
3799 grad = cpu_grad.to('mps')
3812 def test_torch_repeat_interleave(self, device="mps"):
3819 lengths = torch.tensor([1, 2], dtype=dtype, device="mps")
3838 def test_repeat_interleave(self, device="mps"):
3851 torch.repeat_interleave(torch.tensor([1, 2, -1, 3, 4], device=device))
3891 x = torch.randn(shape, dtype=dtype, device="mps")
3900 helper(shape=3, num_repeats=torch.tensor([100], device="mps"))
3901 helper(shape=(2, 2), num_repeats=torch.tensor([3, 3], device="mps"), dim=0)
3902 helper(shape=(10, 15, 8), num_repeats=torch.arange(10, device="mps"), dim=0)
3903 helper(shape=(10, 15, 8), num_repeats=torch.randint(0, 100, (15, ), device="mps"), dim=1)
3904 helper(shape=(10, 15, 30), num_repeats=torch.randint(0, 100, (30, ), device="mps"), dim=2)
3909 [[1, 0, 2], [3, 0, 2], [7, 9, -4]],
3913 mps_x = torch.tensor(n, dtype=dtype).to('mps')
3915 # All non-zeros
3953 mps_x = torch.tensor(values, device='mps')
3959 self.assertEqual(torch.tensor(1.3, device='mps').int().cpu(),
3961 self.assertEqual(torch.tensor(0.0, device='mps').bool().cpu(), torch.tensor(False))
3962 self.assertEqual(torch.tensor(0.1, device='mps').bool().cpu(), torch.tensor(True))
3963 self.assertEqual(torch.tensor(0.1, device='mps').bool().int().cpu(),
3965 self.assertEqual(torch.tensor(0.1, device='mps').bool().int().float().cpu(),
3967 self.assertEqual(torch.tensor(4.25, device='mps').to('cpu', torch.int),
3969 self.assertEqual(torch.tensor(4.25, device='cpu').to('mps', torch.int).cpu(),
3971 self.assertEqual(torch.tensor(-8.34, device='cpu').to('mps', torch.int),
3972 torch.tensor(-8.34, device='cpu').to('mps').to(torch.int))
3976 cpu_char = torch.tensor([60, -60, 20, -120], dtype=torch.uint8)
3978 x_mps = x_cpu.to('mps')
3982 def test_setitem_scalar(self) -> None:
3983 device = 'mps'
3997 def test_stride_of_strides(self) -> None:
3998 x = torch.rand(32, 1, device='mps')
4001 # See https://github.com/pytorch/pytorch/issues/79181#issuecomment-1154683435
4009 a_mps = a_cpu.to(torch.device('mps'))
4027 a_mps = a_cpu.to(torch.device('mps'))
4048 for i in range(0, n_tensors - 1):
4053 for i in range(0, n_tensors - 1):
4055 t_mps = t.to("mps")
4062 x = torch.full((3, 3), True, device='mps')
4064 y_mps = torch.full((2, 2), 247, device='mps', dtype=torch.uint8)
4073 x = torch.tensor(list(range(1, 11)), device='mps', dtype=dtype)
4079 x = torch.tensor([[1], [0]], dtype=torch.bool, device='mps')
4080 y = torch.tensor([0, 1], dtype=torch.bool, device='mps')
4084 x = torch.tensor([[1], [0]], dtype=torch.int8, device='mps')
4085 y = torch.tensor([0, 1], dtype=torch.int8, device='mps')
4090 x = torch.tensor([[]], device='mps')
4091 y = -x
4133 # tests per-element unique on a higher rank tensor.
4140 def test_unique_all_dtypes(self, device="mps"):
4200 x = cpu_x.detach().clone().to('mps')
4214 x = torch.arange(2, device="mps")
4220 x = cpu_x.detach().clone().to('mps')
4260 mps_data = data.to("mps")
4273 t_mps = torch.tensor(a, device="mps")
4280 y = x.to('mps')
4285 y = x.to('mps')
4289 y = torch.full((4, 4, 4, 4), 13, device="mps")
4292 # As y is on MPS and z on CPU, this dispatches to a copy operator
4299 x_mps = torch.zeros(5, device="mps", dtype=torch.float32)
4301 update_mps = torch.tensor([1, 1], device="mps", dtype=torch.int64)
4314 mps_src = cpu_src.to("mps")
4315 mps_dst = cpu_dst.to("mps")
4333 self.assertEqual(torch.tensor([[1]], device='mps').item(), 1.0)
4339 # Example values for all dtypes supported on the MPS backend
4342 torch.int16: [-15, 0, 1, 10],
4343 torch.int32: [-376, 0, 1, 13],
4344 torch.int64: [-8, 0, 1, 77],
4345 torch.float16: [-234.5, 0.0, 1.0, 2.0],
4346 torch.float32: [-1.0, 0.0, 0.1, 111.99],
4357 # print(getattr(torch.tensor(val1, dtype=dtype1, device='mps'), binop)
4358 # (torch.tensor(val2, dtype=dtype2, device='mps')))
4362 getattr(torch.tensor(val1, dtype=dtype1, device='mps'), binop)
4363 (torch.tensor(val2, dtype=dtype2, device='mps')),
4367 getattr(torch.tensor([val1], dtype=dtype1, device='mps'), binop)
4368 (torch.tensor([val2], dtype=dtype2, device='mps')),
4372 getattr(torch.tensor(val1, dtype=dtype1, device='mps'), binop)
4373 (torch.tensor([val2], dtype=dtype2, device='mps')),
4377 getattr(torch.tensor([val1], dtype=dtype1, device='mps'), binop)
4378 (torch.tensor(val2, dtype=dtype2, device='mps')),
4382 x1 = torch.full(full_shape, val1, dtype=dtype1, device='mps')
4383 y1 = torch.tensor(val2, dtype=dtype2, device='mps')
4387 x3 = torch.tensor(val1, dtype=dtype1, device='mps')
4388 y3 = torch.full(full_shape, val2, dtype=dtype2, device='mps')
4393 getattr(torch.tensor(val1, dtype=dtype1, device='mps'), binop)
4394 (torch.full(full_shape, val2, dtype=dtype2, device='mps')),
4406 low=-scale, high=scale, noncontiguous=noncontiguous)
4415 x_mps = x_cpu.to('mps')
4416 actual_out_mps = torch.empty(0, dtype=dtype, device='mps')
4425 # Test MPS
4445 t = torch.tensor([1, 1, 1, 1], device="mps", dtype=dtype)
4458 self.assertEqual(e_string, "MPS does not support cumsum_out_mps op with int64 input." +
4464 t_mps = a.to("mps").cumsum(0)
4470 # Test with axis -1
4476 x = cpu_x.detach().clone().to('mps')
4478 cpu_y = cpu_x.cumsum(-1)
4479 y = x.cumsum(-1)
4487 t = torch.tensor([1, 1, 1, 1], device="mps", dtype=dtype)
4500 self.assertEqual(e_string, "MPS does not support cumprod_out_mps op with int64 input."
4505 # Test with axis -1
4511 x = cpu_x.detach().clone().to('mps')
4513 cpu_y = cpu_x.cumprod(-1)
4514 y = x.cumprod(-1)
4522 cpu_x = torch.randint(-9999, 9999, shape, device='cpu', dtype=dtype)
4523 x = cpu_x.detach().clone().to('mps')
4535 a = torch.tensor(1., device="mps", requires_grad=True)
4547 ones1 = torch.tensor(values_1, device='mps')
4548 x = cpu_x.detach().clone().to('mps').requires_grad_()
4558 mps_x = cpu_x.to('mps')
4563 strided_cpu_out = strided_cpu1 - strided_cpu2
4564 strided_mps_out = strided_mps1 - strided_mps2
4569 x_mps = torch.arange(1., 8, device="mps")
4579 x = torch.empty((0, 1, 3, 0), dtype=dt, device="mps")
4583 x = torch.tensor(0.5, device="mps")
4584 # unfold on a 0-dimensional tensor should always return a 1-d dimensional
4587 self.assertEqual(torch.empty(0, device="mps"), x.unfold(0, 0, 1))
4588 self.assertEqual(torch.empty(0, device="mps"), x.unfold(0, 0, 2))
4589 self.assertEqual(torch.tensor([0.5], device="mps"), x.unfold(0, 1, 1))
4592 input = torch.randint(0, 8, (5,), dtype=torch.int32, device="mps")
4594 weights = torch.linspace(0, 1, steps=5, device="mps", dtype=torch.float32)
4606 device = "mps"
4608 with self.assertRaisesRegex(RuntimeError, '1-d non-negative integral'):
4609 torch.bincount(torch.tensor([1, -1], device=device, dtype=torch.int32))
4610 # n-d input, with n > 1 throws
4611 with self.assertRaisesRegex(RuntimeError, '1-d non-negative integral'):
4617 minlength=-1)
4618 # n-d weights, with n > 1 throws
4619 with self.assertRaisesRegex(RuntimeError, '1-d'):
4626 # 1-d input with no elements and default minlength
4629 # 1-d input with no elements and specified minlength
4660 # test non-contiguous inputs and weights
4664 assert not inputs[:, i].is_contiguous(), "Inputs are supposed to be non-contiguous"
4665 assert not weights[:, i].is_contiguous(), "Weights are supposed to be non-contiguous"
4666 # inputs are non-contiguous but weights are contiguous
4668 # inputs and weights are non-contiguous
4672 # weights are non-contiguous but inputs are contiguous
4676 # test bincount on non-contiguous slices
4683 # test large number of bins - global memory use
4685 big_exp[-1] = 50.0
4696 device = "mps"
4722 x = cpu_x.detach().clone().to('mps').requires_grad_()
4742 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
4743 targetMPS = targetCPU.detach().clone().to('mps')
4771 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
4772 targetMPS = targetCPU.detach().clone().to('mps')
4798 model_mps = copy.deepcopy(model_cpu).to("mps")
4803 x_mps = x.detach().clone().to("mps").permute(0, 2, 1)
4813 y_hat_mps = y_hat.detach().clone().to("mps")
4830 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
4831 targetMPS = targetCPU.detach().clone().to('mps')
4853 target = torch.ones(5, device='mps')
4854 input = torch.ones(5, device='mps')
4857 target = torch.zeros(5, device='mps')
4858 input = torch.zeros(5, device='mps')
4863 a = torch.rand(25, device='mps')
4864 b = torch.rand(25, 1, device='mps')
4871 target = torch.rand(x_size, y_size, device='mps')
4874 output_sig = torch.rand(x_size, y_size, device='mps') - 0.5
4879 weight = torch.rand(y_size, device='mps')
4891 grad = torch.rand(x_size, y_size, device='mps')
4901 output = torch.zeros(3, 1, requires_grad=True, device='mps')
4902 target = torch.zeros(3, 1, device='mps')
4904 expected_grad = torch.empty(3, 1, device='mps').fill_(0.5)
4908 target = torch.rand(16, 4, device='mps')
4909 output = torch.rand(16, 4, device='mps') - 0.5
4911 weight = torch.rand(4, device='mps')
4919 weight = torch.rand(16, 1, device='mps')
4928 target = torch.rand(64, 4, device='mps')
4929 output = torch.rand(64, 4, device='mps') - 0.5
4930 pos_weight = torch.ones(64, 4, device='mps')
4936 target = torch.rand(64, 4, device='mps')
4937 output = torch.rand(64, 4, device='mps') - 0.5
4938 pos_weight = torch.rand(4, device='mps')
4951 output = torch.zeros(3, 1, requires_grad=True, device='mps')
4952 target = torch.zeros(3, 1, device='mps')
4953 pos_weight = torch.ones(3, 1, device='mps')
4955 expected_grad = torch.empty(3, 1, device='mps').fill_(0.5)
4960 output = torch.tensor([0., -120.], device='mps')
4961 target = torch.tensor([0., 1.], device='mps')
4962 pos_weight = torch.tensor([1., 1.], device='mps')
4972 target = torch.rand(16, 4, device='mps')
4973 output = torch.rand(16, 4, device='mps') - 0.5
4975 weight = torch.rand(4, device='mps')
4983 weight = torch.rand(16, 1, device='mps')
4994 pred = torch.randn(3, 5, requires_grad=True, dtype=torch.float16, device='mps')
4995 target = torch.ones(3, dtype=torch.long, device='mps')
5002 mps_x = torch.tensor(values, device='mps', requires_grad=True)
5009 mps_grad = torch.ones_like(cpu_log_softmax).to('mps')
5019 [-10.0, -100.0, -1000.0, -10000.0, -100000.0, -1000000.0]
5022 mps_x = torch.tensor(values, device='mps', requires_grad=True)
5024 cpu_log_softmax = F.log_softmax(cpu_x, dim=-1)
5025 mps_log_softmax = F.log_softmax(mps_x, dim=-1)
5029 mps_grad = torch.ones_like(cpu_log_softmax).to('mps')
5039 mps_x = torch.tensor(values1, device='mps')
5040 mps_y = torch.tensor(values2, device='mps')
5050 cpu_x = torch.tensor((-1, 2, 3), device='cpu', dtype=torch.uint8)
5051 mps_x = torch.tensor((-1, 2, 3), device='mps', dtype=torch.uint8)
5053 self.assertEqual(cpu_x == -1, mps_x == -1)
5054 self.assertEqual(cpu_x > -1, mps_x > -1)
5055 self.assertEqual(cpu_x < -1, mps_x < -1)
5060 mps_x = torch.tensor(values1, device='mps')
5061 mps_y = torch.tensor(values2, device='mps')
5073 mps_x = cpu_x.detach().clone().to('mps')
5074 mps_y = cpu_y.detach().clone().to('mps')
5085 mps_x = cpu_x.detach().clone().to('mps')
5097 mps_x = cpu_x.detach().clone().to('mps')
5098 mps_y = cpu_y.detach().clone().to('mps')
5109 mps_x = cpu_x.detach().clone().to('mps')
5121 mps_x = cpu_x.detach().clone().to('mps')
5122 mps_y = cpu_y.detach().clone().to('mps')
5133 mps_x = cpu_x.detach().clone().to('mps')
5145 mps_x = cpu_x.detach().clone().to('mps')
5146 mps_y = cpu_y.detach().clone().to('mps')
5157 mps_x = cpu_x.detach().clone().to('mps')
5169 mps_x = cpu_x.detach().clone().to('mps')
5170 mps_y = cpu_y.detach().clone().to('mps')
5181 mps_x = cpu_x.detach().clone().to('mps')
5194 mps_tensor = cpu_tensor.to(torch.device('mps'))
5199 mps_tensor = torch.randn(10, 2, device='mps', dtype=torch.float32)
5218 x = cpu_x.detach().clone().to('mps')
5221 x = cpu_x.detach().clone().to('mps')
5224 x = cpu_x.detach().clone().to('mps').requires_grad_()
5273 …x_mps = torch.tensor([sys.maxsize, sys.maxsize - 10, sys.maxsize - 5, sys.maxsize - 18], device="m…
5281 # Note - don't test grad now
5287 x = cpu_x.detach().clone().to('mps')
5290 x = cpu_x.detach().clone().to('mps')
5293 x = cpu_x.detach().clone().to('mps')
5306 y_0 = torch.ones(c, h, w, device='mps', dtype=dtype)
5307 idx_0 = torch.ones(c, h, w, device='mps', dtype=torch.int64)
5313 y_0dim = torch.ones(1, c, h, w, device='mps', dtype=dtype)
5314 idx_0dim = torch.ones(1, c, h, w, device='mps', dtype=torch.int64)
5320 y_1 = torch.ones(n, h, w, device='mps', dtype=dtype)
5321 idx_1 = torch.ones(n, h, w, device='mps', dtype=torch.int64)
5327 y_1dim = torch.ones(n, 1, h, w, device='mps', dtype=dtype)
5328 idx_1dim = torch.ones(n, 1, h, w, device='mps', dtype=torch.int64)
5334 y_2 = torch.ones(n, c, w, device='mps', dtype=dtype)
5335 idx_2 = torch.ones(n, c, w, device='mps', dtype=torch.int64)
5341 y_2dim = torch.ones(n, c, 1, w, device='mps', dtype=dtype)
5342 idx_2dim = torch.ones(n, c, 1, w, device='mps', dtype=torch.int64)
5348 y_3 = torch.ones(n, c, h, device='mps', dtype=dtype)
5349 idx_3 = torch.ones(n, c, h, device='mps', dtype=torch.int64)
5355 y_3dim = torch.ones(n, c, h, 1, device='mps', dtype=dtype)
5356 idx_3dim = torch.ones(n, c, h, 1, device='mps', dtype=torch.int64)
5369 mps_x = cpu_x.detach().clone().to('mps')
5385 mps_x = cpu_x.detach().clone().to('mps')
5426 x = cpu_x.detach().clone().to('mps')
5476 x_mps = fn(torch.zeros(shape, device="mps"), dim=dim)
5484 x = (torch.rand(2, 3, 4, 3, 4, 2, device="mps") - .5).relu()
5486 for i in range(-5, 6):
5508 x = cpu_x.detach().clone().to('mps')
5555 x_mps = x_cpu.to("mps")
5562 x = cpu_x.detach().clone().to('mps')
5573 y_0 = torch.ones(c, h, w, device='mps', dtype=torch.float)
5574 idx_0 = torch.ones(c, h, w, device='mps', dtype=torch.int64)
5585 y_0dim = torch.ones(1, c, h, w, device='mps', dtype=torch.float)
5586 idx_0dim = torch.ones(1, c, h, w, device='mps', dtype=torch.int64)
5597 y_1 = torch.ones(n, h, w, device='mps', dtype=torch.float)
5598 idx_1 = torch.ones(n, h, w, device='mps', dtype=torch.int64)
5609 y_1dim = torch.ones(n, 1, h, w, device='mps', dtype=torch.float)
5610 idx_1dim = torch.ones(n, 1, h, w, device='mps', dtype=torch.int64)
5621 y_2 = torch.ones(n, c, w, device='mps', dtype=torch.float)
5622 idx_2 = torch.ones(n, c, w, device='mps', dtype=torch.int64)
5633 y_2dim = torch.ones(n, c, 1, w, device='mps', dtype=torch.float)
5634 idx_2dim = torch.ones(n, c, 1, w, device='mps', dtype=torch.int64)
5645 y_3 = torch.ones(n, c, h, device='mps', dtype=torch.float)
5646 idx_3 = torch.ones(n, c, h, device='mps', dtype=torch.int64)
5657 y_3dim = torch.ones(n, c, h, 1, device='mps', dtype=torch.float)
5658 idx_3dim = torch.ones(n, c, h, 1, device='mps', dtype=torch.int64)
5673 x = cpu_x.detach().clone().to('mps')
5676 x = cpu_x.detach().clone().to('mps')
5679 x = cpu_x.detach().clone().to('mps').requires_grad_()
5731 x = torch.ones(2, 4, 1, 30, 1, device='mps').sum(dim=-2)
5742 x = cpu_x.detach().clone().to('mps')
5745 x = cpu_x.detach().clone().to('mps')
5748 x = cpu_x.detach().clone().to('mps').requires_grad_()
5773 x = cpu_x.detach().clone().to('mps').requires_grad_()
5826 x = cpu_x.detach().clone().to('mps')
5929 x = cpu_x.detach().clone().to('mps')
5934 zero_dim_var = x.var(-1, keepdim=keepdim, unbiased=unbiased)
5935 zero_dim_var_cpu = cpu_x.var(-1, keepdim=keepdim, unbiased=unbiased)
5954 zero_one_dim_var = torch.var(x, dim=[0, -1], keepdim=keepdim, unbiased=unbiased)
5955 … zero_one_dim_var_cpu = torch.var(cpu_x, dim=[0, -1], keepdim=keepdim, unbiased=unbiased)
5970 x = cpu_x.detach().clone().to('mps').requires_grad_()
5976 grad = cpu_grad.to('mps')
5992 x = cpu_x.detach().clone().to('mps').requires_grad_()
5998 grad = cpu_grad.to('mps')
6015 mps_x = cpu_x.detach().clone().to('mps')
6016 mps_y = cpu_y.detach().clone().to('mps')
6030 x = cpu_x.detach().clone().to('mps')
6034 clamp_min_vals_mps = torch.ones(10, device="mps").to(torch.float16)
6035 clamp_max_vals_mps = torch.ones(10, device="mps").to(torch.float16) * 10
6045 t_mps = torch.tensor([torch.nan, 1, 2], device="mps")
6048 clamp_min_max_mps = torch.clamp(t_mps, min=-100, max=100)
6049 clamp_min_max_cpu = torch.clamp(t_cpu, min=-100, max=100)
6053 clamp_min_mps = torch.clamp(t_mps, min=-100)
6054 clamp_min_cpu = torch.clamp(t_cpu, min=-100)
6067 x = cpu_x.detach().clone().to('mps')
6070 min_t = cpu_min_t.detach().clone().to('mps')
6089 x = cpu_x.detach().clone().to('mps')
6092 max_t = cpu_max_t.detach().clone().to('mps')
6116 x = cpu_x.detach().clone().to('mps')
6121 min_t = cpu_min_t.detach().clone().to('mps')
6126 max_t = cpu_max_t.detach().clone().to('mps')
6148 clamp_result = torch.clamp(x.movedim(0, -1), min=200.0, max=600.0)
6149 clamp_result_cpu = torch.clamp(cpu_x.movedim(0, -1), min=200.0, max=600.0)
6153 … clamp_result = torch.clamp(x.movedim(0, -1), min=min_t.movedim(0, -1), max=max_t.movedim(0, -1))
6154 …amp_result_cpu = torch.clamp(cpu_x.movedim(0, -1), min=cpu_min_t.movedim(0, -1), max=cpu_max_t.mov…
6159 x.movedim(0, -1).clone(memory_format=torch.contiguous_format),
6160 min=min_t.movedim(0, -1),
6161 max=max_t.movedim(0, -1)
6164 cpu_x.movedim(0, -1).clone(memory_format=torch.contiguous_format),
6165 min=cpu_min_t.movedim(0, -1),
6166 max=cpu_max_t.movedim(0, -1)
6188 … cpu_x = torch.randint(-10, 0, shape, device='cpu', dtype=dtype, requires_grad=False)
6189 … cpu_y = torch.randint(-10, 0, shape, device='cpu', dtype=dtype, requires_grad=False)
6191 mps_x = cpu_x.detach().clone().to('mps')
6193 mps_y = cpu_y.detach().clone().to('mps')
6212 mps_x = cpu_x.detach().clone().to('mps')
6235 …torch.tensor([-3, -2, -1, 1, 2, 3], dtype=torch.int32, device="cpu"), torch.tensor(2, device="cpu"…
6237 …torch.tensor([-3, -2, -1, 1, 2, 3], dtype=torch.int32, device="mps"), torch.tensor(2, device="mps"…
6241 torch.tensor([1, 2, 3, 4, 5], dtype=torch.int32, device="cpu"), -1.5)
6243 torch.tensor([1, 2, 3, 4, 5], dtype=torch.int32, device="mps"), -1.5)
6250 x = cpu_x.detach().clone().to('mps')
6263 x = x_cpu.detach().clone().to('mps')
6269 x = cpu_x.detach().clone().to('mps').requires_grad_()
6288 device = 'mps'
6311 inputMPS = inputCPU.detach().to('mps').requires_grad_()
6344 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
6376 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
6401 for mode in ['nearest', 'nearest-exact']:
6407 for mode in ['nearest', 'nearest-exact', 'bilinear']:
6420 x = cpu_x.detach().clone().to('mps')
6423 y = cpu_y.detach().clone().to('mps')
6426 z = cpu_z.detach().clone().to('mps')
6454 x = cpu_x.detach().clone().to('mps')
6456 y = cpu_y.detach().clone().to('mps')
6458 z = cpu_z.detach().clone().to('mps')
6461 x = cpu_x.detach().clone().to('mps')
6463 y = cpu_y.detach().clone().to('mps')
6465 z = cpu_z.detach().clone().to('mps')
6468 x = cpu_x.detach().clone().to('mps').requires_grad_()
6470 y = cpu_y.detach().clone().to('mps').requires_grad_()
6472 z = cpu_z.detach().clone().to('mps').requires_grad_()
6484 # Empty test - Currently failing! Empty tensor not handled!
6491 x = cpu_x.detach().clone().to('mps')
6503 x = cpu_x.detach().clone().to('mps')
6515 x = cpu_x.detach().clone().to('mps')
6527 x = cpu_x.detach().clone().to('mps')
6539 x = cpu_x.detach().clone().to('mps')
6551 x = cpu_x.detach().clone().to('mps')
6554 y = cpu_y.detach().clone().to('mps')
6566 x = cpu_x.detach().clone().to('mps')
6569 y = cpu_y.detach().clone().to('mps')
6581 x = cpu_x.detach().clone().to('mps')
6583 log_result = torch.logsumexp(x, -1)
6584 log_result_cpu = torch.logsumexp(cpu_x, -1)
6595 x = cpu_x.detach().clone().to('mps')
6598 y = cpu_y.detach().clone().to('mps')
6601 z = cpu_z.detach().clone().to('mps')
6604 w = cpu_w.detach().clone().to('mps')
6613 x = cpu_x.detach().clone().to('mps')
6616 y = cpu_y.detach().clone().to('mps')
6619 z = cpu_z.detach().clone().to('mps')
6628 # Empty test - Currently failing! Empty tensor not handled!
6638 x = cpu_x.detach().clone().to('mps')
6651 x = cpu_x.detach().clone().to('mps').requires_grad_()
6657 grad = cpu_grad.to('mps')
6671 x = cpu_x.detach().clone().to('mps').requires_grad_()
6677 grad = cpu_grad.to('mps')
6693 x = cpu_x.detach().clone().to('mps').requires_grad_(True)
6699 grad = cpu_grad.to('mps')
6722 F.elu(elu_input_noncontiguous.to('mps'), alpha, inplace)
6729 x = cpu_x.detach().clone().to('mps').requires_grad_()
6736 grad = cpu_grad.to('mps')
6752 x = cpu_x.detach().clone().to('mps').requires_grad_()
6758 grad = cpu_grad.to('mps')
6780 x = cpu_x.detach().clone().to('mps').requires_grad_()
6786 grad = cpu_grad.to('mps')
6801 input_cast_mps = input.to('mps')
6812 input_mps = input_cpu.to('mps')
6823 x = cpu_x.detach().clone().to('mps').requires_grad_()
6829 grad = cpu_grad.to('mps')
6834 # Test adaptive avg pool2d - when the input size is a multiple of output size
6842 x = cpu_x.detach().clone().to('mps').requires_grad_()
6848 grad = cpu_grad.to('mps')
6879 # Test max avg pool2d - when the input size is a multiple of output size
6891 x = cpu_x.detach().clone().to('mps').requires_grad_()
6904 grad = cpu_grad.to('mps')
6926 x = cpu_x.detach().clone().to('mps')
6929 # Tranposing will make the tensor non-contiguous
6942 grad = cpu_grad.to('mps')
6947 atol = 1e-5 if dtype == torch.float else 1e-2
6948 rtol = 1e-3 if dtype == torch.float else 1e-2
6951 assert x.grad is not None # Check that the grad is well-populated
6961 …tRaises(RuntimeError, lambda: torch.nn.GELU()(torch.randint(100, (2,), dtype=dtype, device="mps")))
6966 x = cpu_x.detach().clone().to('mps')
6969 # Tranposing will make the tensor non-contiguous
6981 grad = cpu_grad.to('mps')
6986 atol = 1e-5 if dtype == torch.float else 1e-2
6987 rtol = 1e-3 if dtype == torch.float else 1e-2
6990 assert x.grad is not None # Check that the grad is well-populated
7005 devices += ['mps']
7032 x = cpu_x.detach().clone().to('mps')
7048 x = cpu_x.detach().clone().to('mps').requires_grad_()
7051 x = cpu_x.detach().clone().to('mps')
7060 grad = cpu_grad.to('mps')
7067 for min_val, max_val in zip([-1, -2, 3], [1, -1, 4]):
7076 input_mps = input_cpu.detach().clone().to('mps').requires_grad_(requires_grad)
7087 mps_grad = cpu_grad.to('mps')
7107 mps_x = torch.tensor(values, device='mps')
7108 mps_x1 = torch.tensor(values1, device='mps')
7117 mps_x = torch.tensor(values, device='mps')
7136 mps_x = torch.tensor(values, device='mps')
7166 x = cpu_x.detach().clone().to('mps').requires_grad_()
7172 grad = cpu_grad.to('mps')
7184 x = cpu_x.clone().to('mps')
7199 x = cpu_x.detach().clone().to('mps').requires_grad_()
7205 grad = cpu_grad.to('mps')
7215 # See https://github.com/pytorch/pytorch/issues/98074#issuecomment-1496088337
7216 x = torch.arange(18.0, device='mps').reshape(2, 3, 3)
7225 x = cpu_x.detach().clone().to('mps')
7228 idx = cpu_idx.detach().clone().to('mps')
7231 source = cpu_source.detach().clone().to('mps')
7242 helper((2, 3, 3), -1, [1, 2], (2, 3, 2), 6.0)
7255 torch.mps.empty_cache()
7257 x = torch.rand(16000, 67120, device="mps")
7259 idx = torch.arange(0, 2, device="mps")
7265 torch.mps.empty_cache()
7269 x = torch.rand(10, 1, device="mps")
7270 y = torch.rand(1, 32769, device="mps")
7273 # See https://github.com/pytorch/pytorch/issues/116769#issuecomment-1888302095
7277 x = torch.rand(m, n, device="mps", dtype=dtype)
7278 y = torch.rand(n, k, device="mps", dtype=dtype)
7283 … # Used to produce incorrect results with MPS on M1 running MacOS 14.3, but correct with Metal
7286 # see https://github.com/pytorch/pytorch/issues/116769#issuecomment-1920066984
7298 y = x.to(device="mps")
7299 self.assertTrue(torch.all(y == torch.tensor(1.0, device="mps")))
7307 x = cpu_x.detach().clone().to('mps')
7317 helper((2, 3, 3), (-1,))
7331 x = cpu_x.detach().clone().to('mps')
7334 idx = cpu_idx.detach().clone().to('mps')
7346 helper((2, 3, 3), -1, [1, 2])
7353 x = cpu_x.detach().clone().to('mps')
7356 idx = cpu_idx.detach().clone().to('mps')
7369 embeddingMPS = nn.Embedding(n, d, max_norm=True, device='mps')
7371 W_MPS = torch.randn((m, d), requires_grad=True, device='mps')
7372 idx_MPS = torch.tensor(idx, device='mps')
7375 b_MPS = embeddingMPS(idx_MPS) @ W_MPS.t() # modifies weight in-place
7386 b_CPU = embeddingCPU(idx_CPU) @ W_CPU.t() # modifies weight in-place
7403 x = cpu_x.detach().clone().to('mps').requires_grad_()
7409 idx = cpu_idx.detach().clone().to('mps')
7415 grad = cpu_grad.to('mps')
7436 x = cpu_x.detach().clone().to('mps').requires_grad_()
7441 idx = cpu_idx.detach().clone().to('mps')
7447 grad = cpu_grad.to('mps')
7458 x = cpu_x.detach().clone().to('mps').requires_grad_()
7461 src = cpu_src.detach().clone().to('mps').requires_grad_()
7475 idx = cpu_idx.detach().clone().to('mps')
7492 grad = cpu_grad.to('mps')
7525 x = cpu_x.detach().clone().to('mps').requires_grad_()
7528 src = cpu_src.detach().clone().to('mps').requires_grad_()
7534 idx = cpu_idx.detach().clone().to('mps')
7550 grad = cpu_grad.to('mps')
7565 x = cpu_x.detach().clone().to('mps').requires_grad_()
7568 src = cpu_src.detach().clone().to('mps').requires_grad_()
7574 idx = cpu_idx.detach().clone().to('mps')
7600 self.assertFalse(torch.is_nonzero(torch.tensor([0.]).to('mps')))
7601 self.assertTrue(torch.is_nonzero(torch.tensor([1.5]).to('mps')))
7602 self.assertFalse(torch.is_nonzero(torch.tensor([False]).to('mps')))
7603 self.assertTrue(torch.is_nonzero(torch.tensor([3]).to('mps')))
7609 x = cpu_x.detach().clone().to('mps').requires_grad_()
7615 grad = cpu_grad.to('mps')
7627 helper((2, 8, 4, 5), diag=-1)
7628 helper((2, 8, 4, 5), diag=-2)
7629 helper((2, 8, 4, 5), diag=-3)
7635 mps_input = cpu_input.to('mps')
7650 x = cpu_x.detach().clone().to('mps').requires_grad_()
7656 grad = cpu_grad.to('mps')
7668 helper((2, 8, 4, 5), diag=-1)
7669 helper((2, 8, 4, 5), diag=-2)
7670 helper((2, 8, 4, 5), diag=-3)
7680 result = torch.eye(n, dtype=dtype, device='mps')
7683 result = torch.eye(n, m, device='mps')
7699 x = cpu_x.detach().clone().to('mps').requires_grad_()
7705 # grad = cpu_grad.to('mps')
7714 for diag in [0, 1, 2, 3, 4, -1, -2, -3, -4]:
7721 result = torch.linspace(start, end, steps, dtype=dtype, device='mps')
7732 self.assertEqual(np.arange(10), torch.arange(10, device='mps'))
7733 self.assertEqual(np.arange(7, 1, -1), torch.arange(7, 1, -1, device='mps'))
7734 … self.assertEqual(np.arange(1, 2, .3, dtype=np.float32), torch.arange(1, 2, .3, device='mps'))
7735 self.assertEqual(np.arange(6.3, dtype=np.float32), torch.arange(6.3, device='mps'))
7738 out_mps = torch.tensor([], device="mps")
7747 self.assertEqual(np.arange(11, dtype=np.float32), torch.range(0, 10, device='mps'))
7748 self.assertEqual(np.arange(7, 0, -1, dtype=np.float32), torch.range(7, 1, -1, device='mps'))
7749 …(np.array([1.0000, 1.3000, 1.6000, 1.9000], dtype=np.float32), torch.range(1, 2, .3, device='mps'))
7750 self.assertEqual(np.arange(6.3, dtype=np.float32), torch.arange(0, 6.3, device='mps'))
7759 x = cpu_x.detach().clone().to('mps').requires_grad_()
7770 grad = cpu_grad.to('mps')
7781 x = cpu_x.detach().clone().to('mps').requires_grad_()
7787 grad = cpu_grad.to('mps')
7801 for dim in [0, 1, 2, 3, -1, -2, -3]:
7805 inputCPU = torch.tensor([float('nan'), float('inf'), -float('inf'), 3.14])
7806 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
7807 outputCPU = torch.nan_to_num(inputCPU, nan=2.0, posinf=1.0, neginf=-1.0)
7808 outputMPS = torch.nan_to_num(inputMPS, nan=2.0, posinf=1.0, neginf=-1.0)
7816 cond = cpu_cond.detach().clone().to('mps')
7819 x = cpu_x.detach().clone().to('mps').requires_grad_()
7822 y = cpu_y.detach().clone().to('mps').requires_grad_()
7828 grad = cpu_grad.to('mps')
7850 # TODO: Remove me when out OpInfo testing is enabled on MPS
7851 output = torch.tensor(0.0, device="mps")
7852 cond = torch.randint(2, (3, 3), dtype=torch.bool, device="mps")
7853 inp = torch.rand(3, 3, device="mps")
7854 other = torch.rand(3, 3, device="mps")
7862 mps_out = torch.normal(mean, std, shape, device='mps')
7867 mean_tensor = cpu_mean_tensor.detach().clone().to('mps')
7872 std_tensor = cpu_std_tensor.detach().clone().to('mps')
7875 mps_out = torch.zeros(shape, device='mps')
7878 mps_out = torch.zeros(shape, device='mps')
7881 mps_out = torch.zeros(shape, device='mps')
7900 all_ones = torch.ones(shape, device='mps')
7901 all_zeros = torch.zeros(shape, device='mps')
7921 mps_out = torch.zeros(shape, device='mps', dtype=dtype).bernoulli(0.5)
7925 self.assertEqual(uniq, torch.arange(2, device='mps', dtype=dtype))
7931 # explicit manual seeding by creating an MPS Generator
7932 g_mps = torch.Generator(device='mps')
7934 mps_x = torch.randn(5, device='mps', generator=g_mps)
7937 mps_y = torch.randn(5, device='mps', generator=g_mps)
7944 mps_x = torch.randn(5, device='mps', generator=g_mps)
7953 mps_x = torch.randn(5, device='mps', generator=g_mps)
7958 # manual seeding on the "default" MPS generator using
7961 mps_x = torch.randn(5, device='mps')
7962 # manual seeding using torch.mps.manual_seed()
7963 # which should set the "default" MPS generator
7965 torch.mps.manual_seed(230)
7967 mps_y = torch.randn(5, device='mps')
7972 g_state = torch.mps.get_rng_state()
7975 mps_x = torch.randn(5, device='mps')
7979 self.assertEqual(torch.mps._get_default_mps_generator().get_offset(), 2)
7984 …# restore the previously saved state to the "default" MPS generator, and the results should match …
7985 torch.mps.set_rng_state(g_state)
7986 mps_x = torch.randn(5, device='mps')
7991 # MPS stream to finish running each of them
7993 .to(device='mps', dtype=torch.float)
7995 x = torch.rand(1, 128, 6, 6, device='mps', dtype=torch.float, requires_grad=True)
7996 torch.mps.synchronize()
7998 torch.mps.synchronize()
8000 torch.mps.synchronize()
8006 torch.mps.empty_cache()
8008 current_alloc_before = torch.mps.current_allocated_memory()
8013 driver_alloc_before = torch.mps.driver_allocated_memory()
8015 x = torch.ones(1024 * 1024 * 8, device="mps")
8017 current_alloc_after = torch.mps.current_allocated_memory()
8018 driver_alloc_after = torch.mps.driver_allocated_memory()
8025 max_memory = torch.mps.recommended_max_memory()
8031 # the os_signposts->PyTorchMPS and check if events or intervals are logged
8035 with torch.mps.profiler.profile(mode="event", wait_until_completed=False) as p:
8038 .to(device='mps', dtype=torch.float)
8039 x = torch.rand(1, 128, 6, 6, device='mps', dtype=torch.float, requires_grad=True)
8042 torch.mps.profiler.start(mode="interval", wait_until_completed=True)
8044 x = torch.rand(1, 128, 6, 6, device='mps', dtype=torch.float, requires_grad=True)
8046 torch.mps.profiler.stop()
8049 startEvent = torch.mps.Event(enable_timing=True)
8052 .to(device='mps', dtype=torch.float)
8053 x = torch.rand(1, 128, 6, 6, device='mps', dtype=torch.float, requires_grad=True)
8055 endEvent = torch.mps.Event(enable_timing=True)
8062 m.x = torch.rand(3, 3, device='mps')
8073 mps_out = torch.randint(low, high, shape, dtype=dtype, device='mps')
8088 x = torch.empty(10, 10, dtype=dtype, device='mps')
8096 mps_out = torch.zeros(shape, device='mps', dtype=dtype)
8128 mps_x = cpu_x.detach().clone().to('mps')
8131 mps_y = cpu_y.detach().clone().to('mps')
8137 tol = 2e-3 if dtype is torch.float16 else None
8139 if not (cpu_y.shape != () and inplace): # in-place output cannot be broadcasted.
8142 mps_s = cpu_s.detach().clone().to('mps')
8147 mps_s = cpu_s.detach().clone().to('mps')
8165 x = cpu_x.detach().clone().to('mps')
8168 y = cpu_y.detach().clone().to('mps')
8173 tol = 1e-3 if dtype is torch.float16 else None
8182 # see https://github.com/pytorch/pytorch/issues/79835#issuecomment-1164984534
8183 x = torch.ones(4, dtype=torch.int32, device='mps')
8184 self.assertEqual(x + 1, torch.full((4,), 2, dtype=torch.int32, device='mps'))
8185 self.assertTrue(torch.equal(x + 1.5, torch.full((4,), 2.5, device='mps')))
8190 …rch.arange(5, dtype=torch.float32, device="mps") * torch.tensor([True, False, True, False, True], …
8194 …ps_y = torch.arange(5, dtype=torch.float32, device="mps") * torch.tensor([1, 0, 1, 0, 1], device="…
8201 mps_x = cpu_x.detach().clone().to('mps')
8206 mps_x = cpu_x.to('mps')
8207 self.assertEqual(op(cpu_x), op(mps_x), rtol=1e-4, atol=1e-4)
8211 mps_x = cpu_x.detach().clone().to('mps')
8218 mps_x = cpu_x.detach().clone().to('mps')
8220 combined_dim = shape[-1] * shape[-2]
8221 reshaped_dims = list(shape[:-2]) + [combined_dim]
8238 mps_x = cpu_x.detach().clone().to('mps')
8243 mps_x = cpu_x.to('mps')
8244 self.assertEqual(op(cpu_x[::2]), op(mps_x[::2]), rtol=1e-4, atol=1e-4)
8254 # test in-place with storage_offset
8256 mps_x = cpu_x.detach().clone().to('mps')
8265 mps_x = cpu_x.detach().clone().to('mps')
8267 mps_y = cpu_y.detach().clone().to('mps')
8282 input_mps = input_cpu.detach().clone().to("mps")
8285 other_mps = other_cpu.detach().clone().to("mps")
8300 prob_tensor = cpu_prob_tensor.detach().clone().to('mps')
8311 helper(np.array([[0., 0., 0., 0.5, 0.5]]), (3 + 4) / 2, (12.5 - 3.5 ** 2), 100000)
8312 helper(np.array([[.2, .2, .2, .2, .2]]), (0 + 1 + 2 + 3 + 4) / 5, (6 - 2 * 2), 10000)
8313 helper(np.array([[1, 1, 1, 1, 1]]), (0 + 1 + 2 + 3 + 4) / 5, (6 - 2 * 2), 10000)
8314 helper(np.array([1, 1, 1, 1, 1]), (0 + 1 + 2 + 3 + 4) / 5, (6 - 2 * 2), 10000)
8318 x = torch.rand((3, 3), device="mps")
8319 self.assertEqual(x.cumsum(1), x.cumsum(-1))
8320 self.assertEqual(x.cumsum(0), x.cumsum(-2))
8322 self.assertRaises(IndexError, lambda: x.cumsum(-3))
8325 x = torch.rand((3, 3), device="mps")
8326 self.assertEqual(x.cumprod(1), x.cumprod(-1))
8327 self.assertEqual(x.cumprod(0), x.cumprod(-2))
8329 self.assertRaises(IndexError, lambda: x.cumprod(-3))
8338 x = cpu_x.detach().clone().to('mps')
8356 x = cpu_x.detach().clone().to('mps')
8359 other = cpu_other.detach().clone().to('mps')
8379 x = cpu_x.detach().clone().to('mps')
8382 other = cpu_other.detach().clone().to('mps')
8403 x = cpu_x.detach().clone().to('mps')
8406 other = cpu_other.detach().clone().to('mps')
8428 x = torch.randn((30, 15), device='mps', dtype=dtype)
8430 x = torch.randint(0, 100, (30, 15), device="mps", dtype=dtype)
8446 mps_x = cpu_x.detach().clone().to('mps')
8480 A_mps = A.clone().detach().to('mps')
8481 B_mps = B.clone().detach().to('mps')
8498 A = torch.randn(size=[1, 4], device='mps', dtype=torch.float32)
8499 B = torch.randn(size=[1, 4], device='mps', dtype=torch.float16)
8504 C = torch.randn(size=[1, 4], device='mps', dtype=torch.float32)
8516 # MPS
8517 input_mps = input_cpu.detach().clone().to('mps').requires_grad_()
8518 target_mps = target_cpu.detach().clone().to('mps')
8548 def test_nll_loss_mismatched_batch(self, device='mps'):
8562 t2 = torch.tensor([0, 1, 1, 0, -100, 2], dtype=torch.int64, device=device)
8571 output_mps = test_nll_loss_out_of_bounds_ignore_index_helper(device='mps')
8573 for cpu, mps in zip(output_cpu, output_mps):
8574 self.assertEqual(cpu, mps)
8587 _test_nll_loss_invalid_target_dim(device='mps')
8605 _test_nll_loss_invalid_weights(device='mps')
8616 # MPS
8617 input_mps = input.detach().clone().to('mps').requires_grad_()
8618 target_mps = target.detach().clone().to('mps')
8619 weights_mps = weights.to("mps")
8636 # MPS
8637 input_mps = input.detach().clone().to('mps').requires_grad_()
8638 target_mps = target.detach().clone().to('mps')
8683 for dev in ['cpu', 'mps']:
8690 prob = F.log_softmax(input_, dim=-1)
8702 self.assertEqual(result_long['mps'].to('cpu'), result_long['cpu'])
8703 self.assertEqual(grad_long['mps'].to('cpu'), grad_long['cpu'])
8708 x = cpu_x.detach().clone().to('mps')
8748 def __init__(self) -> None:
8754 def __init__(self) -> None:
8767 m = self._create_basic_net()[-1]
8801 m = torch.load(path, encoding='utf-8')
8806 device = 'mps'
8809 tmp_kernel = kernel.expand(-1, 3, -1, -1)
8815 M_mps = M_cpu.to('mps')
8825 print(torch.ones(100, 100, device='mps').nonzero())
8826 print(torch.ones(100, 100, device='mps').nonzero().contiguous())
8891 … module = nn.Conv1d(in_channels=3, out_channels=6, kernel_size=3, stride=-1, bias=True).to(dtype)
8893 with self.assertRaisesRegex(RuntimeError, 'non-positive stride is not supported'):
8923 … module = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=4, stride=-1, bias=True).to(dtype)
8925 with self.assertRaisesRegex(RuntimeError, 'non-positive stride is not supported'):
8931 with self.assertRaisesRegex(RuntimeError, 'non-positive stride is not supported'):
8937 … lambda: torch.conv2d(torch.rand(1, 3, 32, 32), torch.rand(1, 3, 3, 3, device='mps')))
8940 … lambda: torch.conv2d(torch.rand(1, 3, 32, 32, device='mps'), torch.rand(1, 3, 3, 3)))
8943 def test_conv2d_valid_padding(self, device='mps'):
8954 x = torch.rand(1, 1, 10, 10, device="mps", requires_grad=True)
8955 m1 = nn.Conv2d(1, 1, 3, stride=2, padding=1).to("mps")
8956 m2 = nn.Conv2d(1, 1, 4, stride=2, padding=1).to("mps")
8966 x = torch.rand(1, 1, 10, 10, 20, device="mps", requires_grad=True)
8967 m1 = nn.Conv3d(1, 1, 3, stride=2, padding=1).to("mps")
8968 m2 = nn.Conv3d(1, 1, 4, stride=2, padding=1).to("mps")
8982 def transpose_for_scores(x: torch.Tensor) -> torch.Tensor:
8983 new_x_shape = x.size()[:-1] + (num_attention_heads, attention_head_size)
8989 res = key.transpose(-1, -2)
8993 A_mps = A.detach().clone().to("mps")
8996 r2 = attention2(A_mps, device="mps")
9001 def test_group_norm_backward(self, device='mps'):
9010 norm = nn.GroupNorm(32, 128, eps=1e-6, affine=True, device=device)
9023 loss = (out - target).norm(dim=-1).sum()
9024 grad = -torch.autograd.grad(loss, x)[0]
9028 # def test_conv2d_same_padding(self, device='mps'):
9050 m = torch.nn.ConstantPad2d((-2, -2, -2, -2), 3.5)
9052 input_mps = input_cpu.detach().clone().to("mps")
9061 input_mps = input_cpu.detach().clone().to("mps")
9069 k_mps = k_cpu.detach().clone().to("mps")
9072 x_mps = x_cpu.detach().clone().to("mps")
9084 inputMPS = inputCPU.detach().clone().to('mps')
9093 inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
9137 # pad dims < input dims - 2
9181 def test_addmm(self, device="mps", dtype=torch.float32):
9217 def test_addr(self, device="mps", dtype=torch.float32):
9229 def test_matrix_rank(self, device="mps", dtype=torch.float32):
9259 out = torch.empty(a.shape[:-2], dtype=torch.int64, device=device)
9274 … "The operator 'aten::_linalg_svd.U' is not currently implemented for the MPS device."):
9277 def test_pinv(self, device="mps", dtype=torch.float32, precision=1e-4):
9281 # Testing against definition for pseudo-inverses
9288 …self.assertEqual(np_A @ np_A_pinv, (np_A @ np_A_pinv).conj().swapaxes(-2, -1), atol=precision, rto…
9289 …self.assertEqual(np_A_pinv @ np_A, (np_A_pinv @ np_A).conj().swapaxes(-2, -1), atol=precision, rto…
9291 self.assertEqual(A.shape, A_pinv.shape[:-2] + (A_pinv.shape[-1], A_pinv.shape[-2]))
9305 … rconds.append(torch.rand(A.shape[:-2], dtype=torch.float32, device=device).to(rcond_type))
9308 rconds.append(torch.rand(A.shape[-3], device=device))
9329 A = random_hermitian_pd_matrix(sizes[-1], *sizes[:-2], dtype=dtype, device=device)
9338 … "The operator 'aten::_linalg_eigh.eigenvalues' is not currently implemented for the MPS device."):
9345 … "The operator 'aten::_linalg_eigh.eigenvalues' is not currently implemented for the MPS device."):
9357 a_f32 = torch.rand((m, k), device="mps")
9358 b_f32 = torch.rand((k, n), device="mps")
9364 b_int32 = b_int32.to("mps")
9365 b_scales_and_zeros = b_scales_and_zeros.to("mps")
9386 mean_err = ((res - ref).abs() / ref).mean()
9394 a_f32 = torch.rand((m, k), device="mps")
9395 b_f32 = torch.rand((n, k), device="mps")
9399 b, -128, 127, torch.int8
9414 mean_err = ((res - ref).abs() / ref).mean()
9420 denom = torch.maximum(ref.abs(), torch.tensor([1e-6], device=ref.device, dtype=ref.dtype))
9421 err = ((y - ref).abs() / denom).mean().item()
9437 q = torch.randn([1, NH, L, HS], dtype=dtype, device="mps", requires_grad=requires_grad)
9438 k = torch.randn([1, NH, S, HS], dtype=q.dtype, device="mps")
9439 v = torch.randn([1, NH, S, HS], dtype=q.dtype, device="mps")
9484 causal_mask = torch.tril(torch.ones(S, S, dtype=torch.bool, device='mps'))
9488 q = torch.randn([1, NH, L, HS], dtype=dtype, device="mps")
9489 k = torch.randn([1, NH, S, HS], dtype=q.dtype, device="mps")
9490 v = torch.randn([1, NH, S, HS], dtype=q.dtype, device="mps")
9492 input_pos = torch.tensor([i], dtype=torch.int32, device='mps')
9517 x_mps = torch.zeros(10, dtype=torch.float32, device="mps")
9529 s = torch.tensor(input, dtype=torch.uint8, device="mps").unsqueeze(0)
9547 x_mps = x_cpu.to('mps')
9560 a_mps = torch.ones((2, 2),).to(torch.device("mps"))
9561 b_mps = torch.ones((2, 2),).to(torch.device("mps"))
9585 mps_x = cpu_x.detach().clone().to('mps')
9602 if base.device.type == 'mps':
9621 def test_diagonal_view(self, device="mps"):
9629 t = torch.ones((3, 3, 3), device="mps")
9636 def test_select_view(self, device="mps") -> None:
9644 def test_unbind_view(self, device="mps") -> None:
9654 def test_expand_view(self, device="mps") -> None:
9662 def test_expand_as_view(self, device="mps"):
9671 def test_narrow_view(self, device="mps"):
9679 def test_permute_view(self, device="mps") -> None:
9687 def test_transpose_view(self, device="mps"):
9696 def test_transpose_inplace_view(self, device="mps"):
9718 def test_t_view(self, device="mps"):
9728 t_mps = torch.ones((2, 6,), device='mps')[1].reshape(2, 3)
9734 def test_t_inplace_view(self, device="mps"):
9742 def test_T_view(self, device="mps"):
9751 def test_unfold_view(self, device="mps"):
9759 def test_squeeze_view(self, device="mps"):
9766 def test_squeeze_inplace_view(self, device="mps"):
9774 def test_unsqueeze_view(self, device="mps"):
9782 def test_unsqueeze_inplace_view(self, device="mps"):
9790 def test_as_strided_view(self, device="mps"):
9798 def test_as_strided_inplace_view(self, device="mps"):
9806 def test_view_view(self, device="mps"):
9814 def test_view_as_view(self, device="mps"):
9823 def test_contiguous_self(self, device="mps"):
9828 def test_contiguous_nonview(self, device="mps"):
9836 def test_reshape_view(self, device="mps"):
9844 def test_reshape_as_view(self, device="mps"):
9853 def test_reshape_nonview(self, device="mps"):
9861 def test_flatten_view(self, device="mps"):
9873 # zero-dimensional tensor
9887 # [--1--|---2---|-3-] [--1--|----2---|-3-]
9898 def test_flatten_nonview(self, device="mps"):
9918 def test_basic_indexing_slice_view(self, device="mps"):
9926 def test_basic_indexing_ellipses_view(self, device="mps"):
9934 def test_basic_indexing_newaxis_view(self, device="mps"):
9942 def test_chunk_view(self, device="mps"):
9952 def test_split_view(self, device="mps"):
9962 def test_movedim_view(self, device="mps"):
9985 def test_view_copy(self, device="mps"):
10002 def test_view_copy_out(self, device="mps"):
10021 def test_detached_view_copy(self, device="mps"):
10025 # with non-zero offset
10030 def test_empty_reshape(self, device="mps"):
10033 # should be viewable -- i.e. data_ptr is the same.
10036 # match NumPy semantics -- don't infer the size of dimension with a degree of freedom
10037 self.assertRaises(RuntimeError, lambda: x.reshape(0, -1))
10039 def test_expand(self, device="mps"):
10054 # test non-contiguous
10065 # test -1 as target size
10066 self.assertEqual(tensor.expand(4, -1, 5), tensor.expand(4, 8, 5))
10067 self.assertRaises(RuntimeError, lambda: tensor2.expand(-1, -1))
10072 def test_view_empty(self, device="mps"):
10076 def test_reshape(self, device="mps"):
10078 self.assertEqual(x.data_ptr(), x.reshape(-1).data_ptr())
10081 self.assertRaises(RuntimeError, lambda: x.reshape(-1, -1))
10086 self.assertNotEqual(y.data_ptr(), y.reshape(-1).data_ptr())
10087 self.assertEqual(y.contiguous().view(-1), y.reshape(-1))
10092 self.assertEqual(s.reshape(-1).shape, (1,))
10096 self.assertEqual(empty, empty.reshape(-1))
10098 # TODO: fix these once we have multi-dimensional empty tensors
10100 self.assertEqual(empty.reshape([1, -1]).shape, (1, 0))
10108 def test_narrow(self, device="mps"):
10113 self.assertEqual(x.narrow(0, -1, 1), torch.tensor([[6, 7, 8]]))
10114 self.assertEqual(x.narrow(0, -2, 2), torch.tensor([[3, 4, 5], [6, 7, 8]]))
10115 self.assertEqual(x.narrow(0, -3, 3), torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]]))
10116 self.assertEqual(x.narrow(-1, -1, 1), torch.tensor([[2], [5], [8]]))
10117 self.assertEqual(x.narrow(-2, -1, 1), torch.tensor([[6, 7, 8]]))
10119 def test_narrow_tensor(self, device="mps"):
10129 def test_t(self, device="mps"):
10156 def test_split(self, device="mps"):
10190 def test_chunk(self, device="mps"):
10208 tensor.chunk(-2)
10210 def test_unsqueeze(self, device="mps") -> None:
10225 def test_big_transpose(self, device="mps"):
10231 def test_T(self, device="mps"):
10239 def test_transposes(self, device="mps", dtype=torch.float32):
10249 t2 = t2.transpose(-2, -1)
10250 if op[-1] == "H" or op == "adjoint":
10254 def test_transposes_errors(self, device="mps", dtype=torch.float32):
10264 def test_python_types(self, device="mps"):
10278 def test_resize_as_preserves_strides(self, device="mps"):
10284 def test_memory_format_resize_as(self, device="mps"):
10285 def test_helper(shape, memory_format, device="mps"):
10291 test_helper((10, 3, 32, 32), torch.channels_last, device="mps")
10292 test_helper((3, 10, 3, 32, 32), torch.channels_last_3d, device="mps")
10294 def test_memory_format_resize_(self, device="mps"):
10295 def test_helper(shape, numel, memory_format, device="mps"):
10300 test_helper((10, 3, 32, 32), 10 * 3 * 32 * 32, torch.channels_last, device="mps")
10301 test_helper((3, 10, 3, 32, 32), 3 * 10 * 3 * 32 * 32, torch.channels_last_3d, device="mps")
10305 # 0-dim
10311 # 1-dim
10317 # 2,3,4-dim
10326 def test_atleast_gradient(self, device="mps"):
10331 def test_view(self, device="mps"):
10339 self.assertEqual(tensor.view(-1, 5).size(), target)
10340 self.assertEqual(tensor.view(3, -1).size(), target)
10349 self.assertEqual(empty.view(-1).size(), torch.Size([0]))
10350 self.assertEqual(empty.view(10, 3, -1).size(), torch.Size([10, 3, 0]))
10352 …with self.assertRaisesRegex(RuntimeError, r"because the unspecified dimension size -1 can be any v…
10353 empty.view(-1, 0)
10355 …with self.assertRaisesRegex(RuntimeError, r"because the unspecified dimension size -1 can be any v…
10356 empty.view(3, 0, -1, 0)
10359 self.assertRaises(RuntimeError, lambda: tensor.view(7, -1))
10360 self.assertRaises(RuntimeError, lambda: tensor.view(15, -1, -1))
10362 def test_contiguous(self, device="mps"):
10371 def test_resize_mps_dtypes(self, device="mps"):
10378 def test_resize_as_mps_dtypes(self, device="mps"):
10385 def test_resize_overflow(self, device="mps"):
10392 def test_view_all_dtypes_and_devices(self, device="mps"):
10403 conv_gpu = copy.deepcopy(conv_cpu).to(device='mps')
10406 y_gpu = y_cpu.to(device='mps')
10421 a_mps = a_cpu.detach().clone().to("mps")
10422 model_mps = model_cpu.to("mps")
10425 self.assertEqual(out_cpu, out_mps.cpu(), rtol=2.6e-05, atol=2e-04)
10433 deconv_gpu = copy.deepcopy(deconv_cpu).to(device='mps')
10436 y_gpu = y_cpu.to(device='mps')
10450 device = 'mps'
10453 self.assertEqual(tcpu, tgpu.cpu(), rtol=2.6e-05, atol=2e-04)
10461 …_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, groups=groups).to("mps")
10462 … conv_mps.weight.data = conv_cpu.weight.data.detach().clone().to("mps").requires_grad_(True)
10463 conv_mps.bias.data = conv_cpu.bias.data.detach().clone().to("mps").requires_grad_(True)
10468 … x_mps = data.permute(0, 2, 1).detach().clone().to("mps").contiguous().requires_grad_(True)
10475 self.assertEqual(conv_cpu.weight.grad, conv_mps.weight.grad, rtol=2.6e-05, atol=2e-04)
10490 a_mps = a_cpu.detach().clone().to("mps")
10491 model_mps = model_cpu.to("mps")
10501 x_mps = x_cpu.detach().clone().to(device='mps').requires_grad_()
10514 …_channels=N, out_channels=C, kernel_size=H, groups=groups, stride=(strideX, strideY), device="mps")
10515 … conv_mps.weight.data = conv_cpu.weight.data.detach().clone().to("mps").requires_grad_()
10516 … conv_mps.bias.data = conv_cpu.bias.data.detach().clone().to("mps").requires_grad_()
10520 self.assertEqual(res_cpu, res_mps.cpu(), rtol=1e-03, atol=1e-05)
10523 self.assertEqual(res_cpu, res_mps, rtol=2.6e-05, atol=2e-04)
10525 … self.assertEqual(conv_cpu.weight.grad, conv_mps.weight.grad, rtol=2.6e-05, atol=2e-04)
10539 m_mps.weight.data = m_cpu.weight.data.detach().clone().to("mps").requires_grad_()
10540 m_mps.bias.data = m_cpu.bias.data.detach().clone().to("mps").requires_grad_()
10543 input_mps = input_cpu.detach().clone().to("mps")
10553 # non-square kernels and unequal stride and with padding
10558 input_mps = input_cpu.detach().clone().to("mps")
10561 downsample_mps = nn.Conv2d(16, 16, 3, stride=2, padding=1, device="mps")
10562 …downsample_mps.weight.data = downsample_cpu.weight.data.detach().clone().to("mps").requires_grad_()
10563 … downsample_mps.bias.data = downsample_cpu.bias.data.detach().clone().to("mps").requires_grad_()
10566 upsample_mps = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1, device="mps")
10567 … upsample_mps.weight.data = upsample_cpu.weight.data.detach().clone().to("mps").requires_grad_()
10568 upsample_mps.bias.data = upsample_cpu.bias.data.detach().clone().to("mps").requires_grad_()
10585 y_gpu = y_cpu.to(device='mps')
10588 conv_gpu = copy.deepcopy(conv_cpu).to(device='mps')
10591 self.assertEqual(x_cpu, x_gpu.cpu(), rtol=1e-03, atol=1e-05)
10597 y_gpu = y_cpu.to(device='mps')
10600 conv_gpu = copy.deepcopy(conv_cpu).to(device='mps')
10603 self.assertEqual(x_cpu, x_gpu.cpu(), rtol=1e-03, atol=1e-05)
10644 # grid_sample uses AVX for 2d images, but that requires 32-bit indexing for
10645 # 32-bit floats. So we also have a fallback that is used only for float tensors
10646 # requiring 64-bit indexing. That requires too much memory to run on CI, so we
10656 self.assertEqual(out_fallback, out_cpu.float(), atol=1e-5, rtol=5e-5)
10660 … self.assertEqual(input_fallback.grad, input_cpu.grad.float(), atol=1e-4, rtol=5e-5)
10661 … self.assertEqual(grid_fallback.grad, grid_cpu.grad.float(), atol=1e-4, rtol=5e-5)
10663 …input_mps = input_cpu.detach().transpose(0, 1).to("mps").transpose(0, 1).requires_grad_(input_requ…
10664 grid_mps = get_grid('mps', grid_cpu.detach()).requires_grad_()
10667 out_mps.backward(gradients.to("mps"))
10670 self.assertEqual(grid_cpu.grad, grid_mps.grad, atol=5e-5, rtol=0)
10672 # check that zero-dimensional input strides don't error out
10678 … input_mps = base_input.to("mps").expand_as(input_mps).requires_grad_(input_requires_grad)
10740 input = torch.arange(1., 11, device="mps").view(1, 1, 2, 5)
10742 [[[-0.9, -4.1], [0, 0.2000], [1, -1], [-0.333, 1e-6], [0.5, 1.0]],
10743 … [[-1.0, -0.5], [0, 0.3333], [1, -1], [-0.200, 1e-6], [1.5, 0.5]]], device="mps").view(1, 2, 5, 2)
10749 … [2.2500, 6.3332500450, 5.0000, 5.1000, 0.0000]], device="mps").view(1, 1, 2, 5)
10753 … [0.5000, 7.1665000916, 1.2500, 5.0000000000, 0.0000]], device="mps").view(1, 1, 2, 5)
10758 … [2.2500, 6.3332500450, 5.0000, 5.1000, 8.7500]], device="mps").view(1, 1, 2, 5)
10762 … [1.0000, 7.1665000916, 5.0000, 5.0000000000, 10.0000]], device="mps").view(1, 1, 2, 5)
10767 … [2.2500, 6.3332500450, 5.0000, 5.1000, 7.7500]], device="mps").view(1, 1, 2, 5)
10771 … [1.0000000000, 7.1665000916, 5.0000, 5.0000000000, 9.2500]], device="mps").view(1, 1, 2, 5)
10779 [1., 8., 5., 8., 0.]], device="mps").view(1, 1, 2, 5)
10783 [1., 8., 5., 8., 0.]], device="mps").view(1, 1, 2, 5)
10788 [1., 8., 5., 8., 10.]], device="mps").view(1, 1, 2, 5)
10792 [1., 8., 5., 8., 10.]], device="mps").view(1, 1, 2, 5)
10797 [1., 8., 5., 8., 9.]], device="mps").view(1, 1, 2, 5)
10801 [1., 8., 5., 8., 9.]], device="mps").view(1, 1, 2, 5)
10808 [[-0.10424726, 7.1400003, 5.0000, 5.7842274, 9.0000],
10809 … [2.4492188, 7.4814040, 5.0000, 6.0277520, 0.0000]], device="mps").view(1, 1, 2, 5)
10813 … [0.40625, 8.0288770, 1.0625, 5.9375067, -0.3515625]], device="mps").view(1, 1, 2, 5)
10818 … [2.1328125, 6.4258375, 5.0000, 5.076003, 8.8671875]], device="mps").view(1, 1, 2, 5)
10822 … [0.906250, 7.2822485, 4.625, 5.0000052, 10.00000]], device="mps").view(1, 1, 2, 5)
10827 … [1.7812500, 6.703594, 5.0000, 5.0760007, 8.21875]], device="mps").view(1, 1, 2, 5)
10831 … [0.8125000, 7.2822485, 4.25, 5.0000052, 9.332031]], device="mps").view(1, 1, 2, 5)
10839 self.assertEqual(output, groundtruth, atol=1e-5, rtol=0,
10849 device = "mps"
10859 device = "mps"
10897 device = "mps"
10930 device = "mps"
10953 device = "mps"
10959 # Test that MPS doesn't crash if nonzero called concurrently
10961 x = torch.rand(3, 3, device="mps")
10970 x = torch.rand(16, 16, device='mps', dtype=torch.float16)
10975 x_mps = x.to("mps")
10988 x_mps = x_cpu.detach().clone().to("mps")
10998 x_mps = x_cpu.detach().clone().to("mps")
11001 rows_mps = rows_cpu.detach().clone().to("mps")
11004 cols_mps = cols_cpu.detach().clone().to("mps")
11016 x_mps = x_cpu.detach().clone().to("mps")
11032 x_mps = x_cpu.detach().clone().to("mps")
11039 # MPS support binary op with uint8 natively starting from macOS 13.0
11046 x_mps = x_cpu.detach().clone().to("mps")
11077 x_mps = x_cpu.detach().clone().to("mps")
11082 out_tensor_mps = torch.tensor([88, 99], dtype=dtype, device="mps")
11123 target_mps = torch.zeros([5, 3], device="mps", dtype=dtype)
11126 indices_mps = torch.tensor([[0, 1], [0, 1]], dtype=torch.int64, device="mps")
11129 value_mps = torch.ones(indices_mps.shape[0], device="mps", dtype=dtype)
11139 def test_advancedindex_big(self, device="mps"):
11145 def test_set_item_to_scalar_tensor(self, device="mps"):
11155 def test_single_int(self, device="mps"):
11159 def test_multiple_int(self, device="mps"):
11164 def test_none(self, device="mps"):
11171 def test_step(self, device="mps"):
11179 def test_step_assignment(self, device="mps"):
11185 def test_bool_indices(self, device="mps"):
11201 def test_bool_indices_accumulate(self, device="mps"):
11208 def test_multiple_bool_indices(self, device="mps"):
11215 def test_byte_mask(self, device="mps"):
11226 def test_byte_mask_accumulate(self, device="mps"):
11235 def test_index_put_accumulate_expanded_values(self, device="mps"):
11263 values1d = torch.tensor([-1.0, -2.0])
11264 values2d = torch.tensor([[-1.0, -2.0], ])
11274 def test_index_put_accumulate_non_contiguous(self, device="mps"):
11292 def test_index_put_accumulate_with_optional_tensors(self, device="mps"):
11311 out_mps = func(t_dev, indices_dev, value0d.to("mps"))
11315 out_mps = func(t_dev, indices_dev, value1d.to("mps"))
11319 def test_index_put_accumulate_duplicate_indices(self, device="mps"):
11323 delta = torch.empty(i, dtype=torch.float32, device=device).uniform_(-1, 1)
11325 indices = delta.cumsum(0).long().to("mps")
11327 # abs for int64 is not supported on mps, fallback on 'cpu' to calculate it
11328 input = torch.randn(indices.cpu().abs().max().to("mps") + 1, device=device)
11340 def test_index_put_deterministic(self, device="mps"):
11363 with self.assertRaisesRegex(AssertionError, "Tensor-likes are not equal!"):
11368 def test_multiple_byte_mask(self, device="mps"):
11378 def test_byte_mask2d(self, device="mps"):
11385 def test_jit_indexing(self, device="mps"):
11403 def test_int_indices(self, device="mps"):
11416 [helper(device="mps", dtype=dtype) for dtype in [torch.float, torch.int32]]
11434 …[helper(device="mps", dtype=dtype) for dtype in [torch.float, torch.float16, torch.long, torch.boo…
11436 def test_int_indices2d(self, device="mps"):
11443 def test_int_indices_broadcast(self, device="mps"):
11451 def test_empty_index(self, device="mps"):
11458 y[idx] = -1
11462 y[mask] = -1
11465 def test_empty_ndim_index(self, device="mps"):
11479 def test_empty_ndim_index_bool(self, device="mps"):
11483 def test_empty_slice(self, device="mps"):
11492 def test_index_getitem_copy_bools_slices(self, device="mps"):
11506 def test_index_setitem_bools_slices(self, device="mps"):
11515 neg_ones = torch.ones_like(a) * -1
11533 def test_index_scalar_with_bool_mask(self, device="mps"):
11544 def test_setitem_expansion_error(self, device="mps"):
11547 # check prefix with non-1s doesn't work
11555 def test_getitem_scalars(self, device="mps"):
11559 # non-scalar indexed with scalars
11579 def test_setitem_scalars(self, device="mps"):
11582 # non-scalar indexed with scalars
11603 def test_basic_advanced_combined(self, device="mps"):
11619 def test_int_assignment(self, device="mps"):
11628 def test_byte_tensor_assignment(self, device="mps"):
11642 def test_variable_slicing(self, device="mps"):
11648 def test_ellipsis_tensor(self, device="mps"):
11657 def test_invalid_index(self, device="mps"):
11661 def test_out_of_bound_index(self, device="mps"):
11670 def test_zero_dim_index(self, device="mps"):
11680 def test_cpu_indices(self, device="mps"):
11691 def test_nextafter(self, device="mps"):
11693 x = torch.tensor([1, -1, 0, 0, 2, -2], device=device, dtype=dtype)
11694 y = torch.tensor([2, -2, -1, 1, -3, 3], device=device, dtype=dtype)
11698 # greater is broken on MPS, see https://github.com/pytorch/pytorch/issues/125051
11780 … f"mismatch in cpu:{cpu_name} vs mps:{mps_name}, layers: {num_layers}")
11793 def test_lstm_forward(self, device="mps", dtype=torch.float32):
11798 def test_lstm_backward(self, device="mps", dtype=torch.float32):
11805 cell = cell_module(input_size, hidden_size, device='mps')
11816 input = torch.randn(3, input_size, device='mps')
11817 bad_hx = torch.randn(1, hidden_size, device='mps')
11818 good_hx = torch.randn(3, hidden_size, device='mps')
11835 input = torch.randn(3, 10, device='mps')
11836 hx = torch.randn(3, 20, device='mps')
11837 cx = torch.randn(3, 20, device='mps')
11838 lstm = nn.LSTMCell(10, 20, bias=bias, device='mps')
11845 input = torch.randn(3, 11, device='mps')
11846 hx = torch.randn(3, 20, device='mps')
11847 cx = torch.randn(3, 20, device='mps')
11848 lstm = nn.LSTMCell(10, 20, device='mps')
11852 input = torch.randn(3, 10, device='mps')
11853 hx = torch.randn(3, 21, device='mps')
11854 cx = torch.randn(3, 20, device='mps')
11855 lstm = nn.LSTMCell(10, 20, device='mps')
11861 # TODO: Remove once test_testing.py is running on MPS devices
11864 [sys.executable, "-W", "always", "-c", "import torch"],
11868 cwd=os.path.dirname(os.path.realpath(__file__)),).decode("utf-8")
11875 [torch.tensor([1], device='mps'), torch.tensor([2], device='mps')], {},
11876 "torch.lcm(torch.tensor([1], device='mps'), torch.tensor([2], device='mps'))")
11881 … with self.assertRaisesRegex(NotImplementedError, "not currently implemented for the MPS device"):
11909 [sys.executable, '-W', 'always', '-c', script],
11917 e.output.decode("utf-8"))
11923 e.output.decode("utf-8"))
11927 a = torch.ones(1, device="mps")
11928 b = torch.zeros(1, device="mps")
11932 with self.assertRaisesRegex(AssertionError, "Tensor-likes are not close!"):
11938 # with self.assertRaisesRegex(AssertionError, "Tensor-likes are not close!"):
11942 with self.assertRaisesRegex(TypeError, "the MPS framework doesn't support float64"):
11943 a = torch.ones(2, dtype=torch.float64, device="mps")
11945 a = torch.ones(2, device="mps")
11946 with self.assertRaisesRegex(TypeError, "the MPS framework doesn't support float64"):
11950 a = torch.ones(2, device="mps")
11956 # Ensures that cpu Tensor can be loaded on mps
11962 x2 = torch.load(f, map_location="mps")
11965 self.assertEqual(x2.device.type, "mps")
11967 # Ensures that mps Tensors can be loaded on mps
11969 x = torch.rand(2, device="mps")
11976 self.assertEqual(x2.device.type, "mps")
11978 # Ensures that mps Tensors can be loaded on cpu
11980 x = torch.rand(2, device="mps")
11989 # Ensures that `mps:0` Tensors can be loaded on mps
11991 x = torch.rand(2, device="mps:0")
11995 x2 = torch.load(f, map_location="mps:0")
11998 self.assertEqual(x2.device.type, "mps")
12079 return (1e-4, 3e-5)
12082 return (1e-2, 1e-2)
12089 return (5e-2, 5e-2)
12091 return (7e-4, 2e-3)
12093 return (1e-4, 1.3e-5)
12097 return (1e-6, 2e-3 if dtype == torch.float16 else 4e-6)
12099 return (1e-3, 1e-4)
12103 return (3e-5, 3e-5)
12119 # TODO: Enable per-sample seed setting and tweak tolerances / fix xfails
12129 …lambda x: x.detach().to("mps").requires_grad_(x.requires_grad) if isinstance(x, torch.Tensor) else…
12160 # TODO: Enable per-sample seed setting and tweak tolerances / fix xfails
12171 …lambda x: x.detach().to("mps").requires_grad_(x.requires_grad) if isinstance(x, torch.Tensor) else…
12190 atol = 7e-4
12191 rtol = 1.5e-3
12220 mps_grad_outputs = tuple(t.to("mps") for t in cpu_grad_outputs)
12236 self.assertEqual(device, "mps:0")
12238 # TODO: Enable per-sample seed setting and tweak tolerances / fix xfails
12262 # Allocate tensors on mps
12263 with torch.device("mps"):
12265 self.assertTrue(all(x.device.type == "mps" for x in inputs))
12278 # Copied from `TestCommon` in `test_ops.py`, just enough to duplicate the `test_numpy_ref` for MPS
12301 # This is the MPS equivalent of `test_numpy_ref` from `test_ops.py`. It lives over here while
12302 # MPS still requires some fairly heavy special casing in the test framework.
12303 # When MPS becomes more consistent, this can probably be merged with that test using
12306 # MPS only supports float32
12309 …ike `test_numpy_ref`, this test compares in `float32` since at the time of this test's creation MPS
12315 # TODO: Enable per-sample seed setting and tweak tolerances / fix xfails
12336 # TODO: Actually instantiate that test for the "mps" device to better reflect what it is doing.
12337 # This requires mps to be properly registered in the device generic test framework which is not the
12341 instantiate_device_type_tests(TestErrorInputs, globals(), allow_mps=True, only_for="mps")
12342 instantiate_device_type_tests(TestCommon, globals(), allow_mps=True, only_for="mps")
12343 instantiate_device_type_tests(TestLinalgMPS, globals(), allow_mps=True, only_for="mps")