/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include // Declares the operator #include #include #include #include #include #include using namespace ::testing; using exec_aten::MemoryFormat; using exec_aten::optional; using exec_aten::ScalarType; using exec_aten::Tensor; using torch::executor::testing::TensorFactory; // To further emphasize the accuracy of our op_to, we test the conversion // from floating-point types to signed int types directly by the test cases // generated by core Pytorch directly. Such data is random generated in [-5, 5]. // clang-format off typedef std::map< std::type_index, std::variant< std::vector, std::vector, std::vector, std::vector>> FloatingTypeToDataMap; typedef std::map< std::type_index, std::variant< std::vector, std::vector, std::vector, std::vector, std::vector>> IntTypeToDataMap; // clang-format on class OpToTest : public OperatorTest { protected: Tensor& op_to_copy_out( const Tensor& self, bool non_blocking, optional memory_format, Tensor& out) { return torch::executor::aten::_to_copy_outf( context_, self, non_blocking, memory_format, out); } // Cast float vector to OUTPUT_CTYPE vector template std::vector vector_type_cast(std::vector input) { std::vector output(input.size()); std::transform( input.begin(), input.end(), output.begin(), [](INPUT_CTYPE x) { return static_cast(x); }); return output; } template struct ToTestCase { const std::vector sizes; const std::vector data_in; const std::vector data_out; }; // Each test has different combination of input and output types. Therefore it // is a little bit mess if create template test case and custom data types for // both input data and output data. // We choose another way: for all test cases, their data are all in double. // And we are gonna cast them into desired type when delievering them into // tf.make function. Based on our experiments, type cast of core PyTorch is // same as static_cast in c++ in the representable scope, so here we believe // using static_cast to generate ground truth is reasonable. template < typename INPUT_CTYPE, ScalarType INPUT_DTYPE, typename OUTPUT_CTYPE, ScalarType OUTPUT_DTYPE> void test_runner_static_cast( std::vector> test_cases) { TensorFactory tf_in; TensorFactory tf_out; for (auto test_case : test_cases) { auto data_in = vector_type_cast(test_case.data_in); auto data_out = vector_type_cast(data_in); Tensor input = tf_in.make(test_case.sizes, data_in); Tensor output = tf_out.zeros_like(input); Tensor ret = op_to_copy_out( /*self=*/input, /*non_blocking=*/false, exec_aten::MemoryFormat::Contiguous, output); Tensor expected = tf_out.make(test_case.sizes, data_out); // The original tensor a should share same value with the out variable and // return variable of to function EXPECT_TENSOR_EQ(ret, output); EXPECT_TENSOR_EQ(ret, expected); } } template void test_runner_to_bool( std::vector test_case, std::vector data_out) { TensorFactory tf_in; TensorFactory tf_out; auto data_in = vector_type_cast(test_case); Tensor input = tf_in.make({(int)test_case.size()}, data_in); Tensor output = tf_out.zeros_like(input); Tensor ret = op_to_copy_out( /*self=*/input, /*non_blocking=*/false, exec_aten::MemoryFormat::Contiguous, output); Tensor expected = tf_out.make({(int)data_out.size()}, data_out); // The return value of op_to_copy_out and the values written to output // should be the same. EXPECT_TENSOR_EQ(ret, output); // The return value of op_to_copy_out and the values in expected which are // the reference values should be the same. EXPECT_TENSOR_EQ(ret, expected); } template void test_runner_from_bool( std::vector test_case, std::vector out) { TensorFactory tf_in; TensorFactory tf_out; auto data_out = vector_type_cast(out); Tensor input = tf_in.make({(int)test_case.size()}, test_case); Tensor output = tf_out.zeros_like(input); Tensor ret = op_to_copy_out( /*self=*/input, /*non_blocking=*/false, exec_aten::MemoryFormat::Contiguous, output); Tensor expected = tf_out.make({(int)data_out.size()}, data_out); // The return value of op_to_copy_out and the values written to output // should be the same. EXPECT_TENSOR_EQ(ret, output); // The return value of op_to_copy_out and the values in expected which are // the reference values should be the same. EXPECT_TENSOR_EQ(ret, expected); } template < typename INPUT_CTYPE, ScalarType INPUT_DTYPE, typename OUTPUT_CTYPE, ScalarType OUTPUT_DTYPE> void test_runner_hardcode_data( FloatingTypeToDataMap floating_point_data, IntTypeToDataMap int_data) { TensorFactory tf_in; TensorFactory tf_out; if (typeid(OUTPUT_CTYPE) == typeid(uint8_t)) { // Would cause underflow when testing uint8_t. return; } ToTestCase test_case = { /*sizes=*/{3, 5}, /*data_in=*/ std::get>( floating_point_data[typeid(INPUT_CTYPE)]), /*data_out=*/ std::get>(int_data[typeid(OUTPUT_CTYPE)])}; Tensor input = tf_in.make(test_case.sizes, test_case.data_in); Tensor output = tf_out.zeros_like(input); Tensor ret = op_to_copy_out( /*self=*/input, /*non_blocking=*/false, exec_aten::MemoryFormat::Contiguous, output); Tensor expected = tf_out.make(test_case.sizes, test_case.data_out); // The original tensor a should share same value with the out variable and // return variable of to function EXPECT_TENSOR_EQ(ret, output); EXPECT_TENSOR_EQ(ret, expected); } /* %python import torch torch.manual_seed(0) x = torch.rand(2, 3) res = x.to(non_blocking = False, memory_format = torch.preserve_format) op = "op_to_copy_out" opt_setup_params = """ bool non_blocking = false; optional memory_format; """ opt_extra_params = "non_blocking, memory_format," out_args = "out_shape, dynamism" dtype = "ScalarType::Float" check = "EXPECT_TENSOR_EQ" */ void test_dynamic_shape( const std::vector& out_shape, enum torch::executor::TensorShapeDynamism dynamism) { /* %python %rewrite(unary_op) */ TensorFactory tf; Tensor x = tf.make( {2, 3}, {0.49625658988952637, 0.7682217955589294, 0.08847743272781372, 0.13203048706054688, 0.30742281675338745, 0.6340786814689636}); Tensor expected = tf.make( {2, 3}, {0.49625658988952637, 0.7682217955589294, 0.08847743272781372, 0.13203048706054688, 0.30742281675338745, 0.6340786814689636}); bool non_blocking = false; optional memory_format; Tensor out = tf.zeros(out_shape, dynamism); op_to_copy_out(x, non_blocking, memory_format, out); EXPECT_TENSOR_EQ(out, expected); } }; /* Here we temporary not try to implement or test the behavior about casting a * number can not be represented in some type to this type (e.g. inf to int32_t * nan to int64_t or 2147483648 to int32_t), because * - a. The result of such kind of cast is undefined according to c++ standard; * - b. No explicit rules can be found in core pytorch for such transaction (not * same as static_cast or any other casting function in c++); * - c. If user tries to cast a unrepresentable value to certain type, they * should take the risk; * - d. Even though we can always use if/switch to cover these boundry cases, * the code will be lengthy and jumbled. I believe using these disordered * code to meet some undefine behavior is meaningless, and we can not * cover all such cases. */ namespace {} // namespace // Regular test for to_copy.out // Test if to_copy.out works well under all kinds of data pairs TEST_F(OpToTest, AllDtypesSupported) { std::vector> test_cases = { { /*sizes=*/{2, 4}, /*data_in=*/ {2.11, 3.2, 2.3, 4.0, 1.1, 5.2, 1.1, 6.3}, /*data_out=*/ {}, // data_out shouldn't be used in test_runner_static_cast }, { /*sizes=*/{3, 4, 0, 5}, /*data_in=*/{}, /*data_out=*/{}, }, { /*sizes=*/{}, /*data_in=*/{10.0}, /*data_out=*/{}, // data_out shouldn't be used in // test_runner_static_cast }, }; #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_static_cast< \ INPUT_CTYPE, \ ScalarType::INPUT_DTYPE, \ OUTPUT_CTYPE, \ ScalarType::OUTPUT_DTYPE>(test_cases); #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ ET_FORALL_REALHBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); ET_FORALL_REALHBF16_TYPES(TEST_ENTRY); #undef TEST_ENTRY #undef TEST_KERNEL } TEST_F(OpToTest, BoolTests) { std::vector test_case_to_bool = {1.1, 2.2, 0}; std::vector result_to_bool = {true, true, false}; #define TEST_TO_BOOL(INPUT_CTYPE, INPUT_DTYPE) \ test_runner_to_bool( \ test_case_to_bool, result_to_bool); ET_FORALL_REALHBF16_TYPES(TEST_TO_BOOL); std::vector test_case_from_bool = {true, true, false}; std::vector result_from_bool = {1.0, 1.0, 0}; #define TEST_FROM_BOOL(OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_from_bool( \ test_case_from_bool, result_from_bool); ET_FORALL_REALHBF16_TYPES(TEST_FROM_BOOL); } TEST_F(OpToTest, NanInfSupported) { constexpr auto floatInfinity = std::numeric_limits::infinity(); std::vector> test_cases = {{ /*sizes=*/{2, 4}, /*data_in=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6}, /*data_out=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6}, }}; #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_static_cast< \ INPUT_CTYPE, \ ScalarType::INPUT_DTYPE, \ OUTPUT_CTYPE, \ ScalarType::OUTPUT_DTYPE>(test_cases); #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ ET_FORALL_FLOATHBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY); #undef TEST_ENTRY #undef TEST_KERNEL } TEST_F(OpToTest, HardcodeFloatConvertInt) { // Hardcode input and output generated from core PyTorch // clang-format off std::vector float_data = { -1.47900056838989257812, -4.59277725219726562500, 2.15365791320800781250, -2.55494546890258789062, 3.06999135017395019531, 3.27460670471191406250, -3.98865103721618652344, -4.81065988540649414062, 3.67902207374572753906, 3.72226405143737792969, 0.80567771196365356445, 2.23788332939147949219, -0.52035576105117797852, -1.58493483066558837891, -0.30919688940048217773}; std::vector double_data = { -1.47900053955270172068, -4.59277735274143061872, 2.15365796963871947156, -2.55494554556038755422, 3.06999137834642255029, 3.27460679459944969949, -3.98865109243288795682, -4.81065977167646074975, 3.67902198302105531980, 3.72226414774102742911, 0.80567768667100203572, 2.23788335717029518435, -0.52035578832931150828, -1.58493480710766210251, -0.30919688936285893988}; // clang-format on std::vector half_data; std::vector bf16_data; for (auto d : double_data) { half_data.emplace_back(d); bf16_data.emplace_back(d); } std::vector int64_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; std::vector int32_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; std::vector int16_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; std::vector int8_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; // Gathering all floating point data together for better traversial FloatingTypeToDataMap floating_point_data; floating_point_data[typeid(float)] = float_data; floating_point_data[typeid(double)] = double_data; floating_point_data[typeid(exec_aten::Half)] = half_data; floating_point_data[typeid(exec_aten::BFloat16)] = bf16_data; // Gathering all int data together for better traversial IntTypeToDataMap int_data; int_data[typeid(int64_t)] = int64_data; int_data[typeid(int32_t)] = int32_data; int_data[typeid(int16_t)] = int16_data; int_data[typeid(int8_t)] = int8_data; #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_hardcode_data< \ INPUT_CTYPE, \ ScalarType::INPUT_DTYPE, \ OUTPUT_CTYPE, \ ScalarType::OUTPUT_DTYPE>(floating_point_data, int_data); #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ ET_FORALL_INT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY); } TEST_F(OpToTest, MismatchedSizesDie) { if (torch::executor::testing::SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen kernel can handle mismatched sizes"; } TensorFactory tf; Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); Tensor out = tf.zeros({3, 2, 1, 1}); ET_EXPECT_KERNEL_FAILURE( context_, op_to_copy_out( input, /*non_blocking=*/false, exec_aten::MemoryFormat::Contiguous, out)); } // Only contiguous memory is supported, the memory type MemoryFormat::Contiguous // should not be allowed. The function is expected death if using the illegal // memory format. TEST_F(OpToTest, MismatchedMemoryFormatDies) { if (torch::executor::testing::SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen kernel can handle non contiguous memory formats"; } TensorFactory tf_in; TensorFactory tf_out; Tensor input = tf_in.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); Tensor out = tf_out.zeros({3, 1, 1, 2}); ET_EXPECT_KERNEL_FAILURE( context_, op_to_copy_out( input, /*non_blocking=*/false, static_cast(55), out)); // memory format can be null EXPECT_TENSOR_EQ( op_to_copy_out( input, /*non_blocking=*/false, /*memory_format=*/exec_aten::nullopt, out), input); } // Only blocking data transfer supported TEST_F(OpToTest, MismatchedBlockingDie) { if (torch::executor::testing::SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen kernel can handle non blocking data transfer"; } TensorFactory tf; Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); Tensor out = tf.zeros(/*sizes=*/{3, 1, 1, 2}); ET_EXPECT_KERNEL_FAILURE( context_, op_to_copy_out( input, /*non_blocking=*/true, exec_aten::MemoryFormat::Contiguous, out)); } TEST_F(OpToTest, DynamicShapeUpperBoundSameAsExpected) { test_dynamic_shape( {2, 3}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND); } TEST_F(OpToTest, DynamicShapeUpperBoundLargerThanExpected) { test_dynamic_shape( {10, 10}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND); } TEST_F(OpToTest, DynamicShapeUnbound) { if (!torch::executor::testing::SupportedFeatures::get()->output_resize) { GTEST_SKIP() << "Dynamic shape unbound not supported"; } test_dynamic_shape( {1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND); }