From 4af507845a439c0d8395cb10476fdbe1317d6c3c Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 23 Dec 2025 17:07:52 -0800 Subject: [PATCH] [slimtensor] Add SlimTensor class with basic properties and CPU copy operation **Key components:** 1. **`c10/core/Contiguity.h`** - Contiguity checking utility: - `_compute_contiguous()` - computes whether a tensor with given sizes/strides is contiguous in memory (row-major order) 2. **`core/SlimTensor.h`** - Main SlimTensor class with: - **Constructors**: Default (undefined tensor) and full constructor with storage, sizes, strides, dtype, and storage_offset - **Property accessors**: - `sizes()`, `size(dim)` - get tensor dimensions with negative indexing support - `strides()`, `stride(dim)` - get tensor strides with negative indexing support - `dtype()`, `device()`, `device_type()`, `device_index()` - `numel()`, `dim()`, `nbytes()`, `itemsize()` - `data_ptr()` - returns pointer to tensor data (adjusted for storage_offset) - `storage_offset()`, `storage()` - **State queries**: `defined()`, `is_cpu()`, `is_contiguous()`, `is_empty()` - **Copy operation**: `copy_(other)` - copies data from another tensor - Fast path: uses memcpy for both-contiguous tensors - Slow path: element-wise copy respecting strides for non-contiguous tensors - **Setters**: `reset()`, `set_storage()`, `set_sizes_and_strides()` **Curretnt constraints:** - Only CPU device supported - Only Float32 dtype tested - copy_() only supports CPU-to-CPU copy Those contraints will be further improved in the following diffs Differential Revision: [D89750150](https://our.internmc.facebook.com/intern/diff/D89750150/) [ghstack-poisoned] --- backends/aoti/slim/c10/core/Contiguity.h | 54 +++ backends/aoti/slim/c10/core/targets.bzl | 13 + backends/aoti/slim/core/SlimTensor.h | 365 ++++++++++++++++++ backends/aoti/slim/core/targets.bzl | 19 + backends/aoti/slim/core/test/targets.bzl | 22 ++ .../slim/core/test/test_slimtensor_basic.cpp | 334 ++++++++++++++++ .../slim/core/test/test_slimtensor_copy.cpp | 259 +++++++++++++ 7 files changed, 1066 insertions(+) create mode 100644 backends/aoti/slim/c10/core/Contiguity.h create mode 100644 backends/aoti/slim/core/SlimTensor.h create mode 100644 backends/aoti/slim/core/test/test_slimtensor_basic.cpp create mode 100644 backends/aoti/slim/core/test/test_slimtensor_copy.cpp diff --git a/backends/aoti/slim/c10/core/Contiguity.h b/backends/aoti/slim/c10/core/Contiguity.h new file mode 100644 index 00000000000..e3bcfb24341 --- /dev/null +++ b/backends/aoti/slim/c10/core/Contiguity.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include + +namespace executorch::backends::aoti::slim::c10 { + +using ::executorch::runtime::ArrayRef; + +/** + * Compute whether a tensor with given sizes, strides, and numel is contiguous. + * + * A tensor is contiguous if its elements are laid out in memory in row-major + * order, i.e., the stride of the last dimension is 1, and each preceding + * dimension's stride equals the product of all following dimensions' sizes. + * + * @param sizes The sizes of each dimension + * @param strides The strides of each dimension + * @param numel The total number of elements + * @return true if the tensor is contiguous, false otherwise + */ +template +bool _compute_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { + if (numel == 0) { + return true; + } + + T expected_stride = 1; + // Iterate from last dimension to first + for (int64_t d = static_cast(sizes.size()) - 1; d >= 0; d--) { + const auto& size_d = sizes[d]; + if (size_d == 1) { + // Size-1 dimensions don't affect contiguity + continue; + } + + if (strides[d] != expected_stride) { + return false; + } + expected_stride *= size_d; + } + return true; +} + +} // namespace executorch::backends::aoti::slim::c10 diff --git a/backends/aoti/slim/c10/core/targets.bzl b/backends/aoti/slim/c10/core/targets.bzl index c421081f095..500620aecd1 100644 --- a/backends/aoti/slim/c10/core/targets.bzl +++ b/backends/aoti/slim/c10/core/targets.bzl @@ -54,11 +54,24 @@ def define_common_targets(): ], ) + # Header-only library for Contiguity + runtime.cxx_library( + name = "contiguity", + headers = [ + "Contiguity.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + "//executorch/runtime/core:core", + ], + ) + # Combined c10 core library runtime.cxx_library( name = "core", visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ + ":contiguity", ":device", ":device_type", ":scalar_type", diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/SlimTensor.h new file mode 100644 index 00000000000..f3ab9f3fec3 --- /dev/null +++ b/backends/aoti/slim/core/SlimTensor.h @@ -0,0 +1,365 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace executorch::backends::aoti::slim { + +/** + * SlimTensor - A lightweight tensor class for AOTI-driven ET backends runtime. + * + */ +class SlimTensor { + public: + /** + * Construct a SlimTensor with the given storage, sizes, strides, and dtype. + * + * @param storage The underlying storage + * @param sizes The sizes of each dimension + * @param strides The strides of each dimension + * @param dtype The scalar type of tensor elements + * @param storage_offset Offset into storage in number of elements + */ + SlimTensor( + Storage&& storage, + IntArrayRef sizes, + IntArrayRef strides, + c10::ScalarType dtype, + int64_t storage_offset = 0) + : storage_(std::move(storage)), + storage_offset_(storage_offset), + dtype_(dtype) { + set_sizes_and_strides(sizes, strides); + } + + /** + * Default constructor - creates an undefined tensor. + */ + SlimTensor() + : storage_(Storage()), + storage_offset_(0), + numel_(0), + dtype_(c10::ScalarType::Float), + is_contiguous_(true) { + sizes_and_strides_.set_sizes({0}); + sizes_and_strides_.set_strides({1}); + } + + // Default copy/move operations + SlimTensor(const SlimTensor&) = default; + SlimTensor& operator=(const SlimTensor&) = default; + SlimTensor(SlimTensor&&) = default; + SlimTensor& operator=(SlimTensor&&) = default; + ~SlimTensor() = default; + + /** + * Reset the tensor, releasing the storage reference. + */ + void reset() { + storage_.reset(); + } + + // ========================================================================= + // Property Accessors + // ========================================================================= + + /** + * Get the underlying storage. + */ + Storage storage() const { + return storage_; + } + + /** + * Get the total number of bytes for this tensor's data. + */ + size_t nbytes() const { + return numel() * itemsize(); + } + + /** + * Get the size of a single element in bytes. + */ + size_t itemsize() const { + return c10::elementSize(dtype_); + } + + /** + * Get the sizes of all dimensions. + */ + IntArrayRef sizes() const { + return sizes_and_strides_.sizes_arrayref(); + } + + /** + * Get the size of a specific dimension. + */ + int64_t size(int64_t dim) const { + int64_t ndim = static_cast(this->dim()); + ET_CHECK_MSG( + dim >= -ndim && dim < ndim, + "Dimension out of range (expected to be in range of [%ld, %ld], but got %ld)", + -ndim, + ndim - 1, + dim); + if (dim < 0) { + dim += ndim; + } + return sizes_and_strides_.size_at(static_cast(dim)); + } + + /** + * Get the strides of all dimensions. + */ + IntArrayRef strides() const { + return sizes_and_strides_.strides_arrayref(); + } + + /** + * Get the stride of a specific dimension. + */ + int64_t stride(int64_t dim) const { + int64_t ndim = static_cast(this->dim()); + ET_CHECK_MSG( + dim >= -ndim && dim < ndim, + "Dimension out of range (expected to be in range of [%ld, %ld], but got %ld)", + -ndim, + ndim - 1, + dim); + if (dim < 0) { + dim += ndim; + } + return sizes_and_strides_.stride_at(static_cast(dim)); + } + + /** + * Get the scalar type of tensor elements. + */ + c10::ScalarType dtype() const { + return dtype_; + } + + /** + * Get the device where the tensor data resides. + */ + const c10::Device& device() const { + return storage_->device(); + } + + /** + * Get the device type. + */ + c10::DeviceType device_type() const { + return storage_->device().type(); + } + + /** + * Get the device index. + */ + c10::DeviceIndex device_index() const { + return storage_->device().index(); + } + + /** + * Get the storage offset in number of elements. + */ + int64_t storage_offset() const { + return storage_offset_; + } + + /** + * Get the total number of elements. + */ + size_t numel() const { + return numel_; + } + + /** + * Get the number of dimensions. + */ + size_t dim() const { + return sizes_and_strides_.size(); + } + + /** + * Get a pointer to the tensor data, adjusted for storage offset. + */ + void* data_ptr() const { + return static_cast(storage_->data()) + storage_offset_ * itemsize(); + } + + /** + * Check if the tensor is contiguous in memory (row-major order). + */ + bool is_contiguous() const { + return is_contiguous_; + } + + /** + * Check if the tensor has zero elements. + */ + bool is_empty() const { + return numel_ == 0; + } + + /** + * Check if the tensor is on CPU. + */ + bool is_cpu() const { + return device().is_cpu(); + } + + /** + * Check if the tensor is defined (has valid storage). + */ + bool defined() const { + return storage_.get() != nullptr; + } + + // ========================================================================= + // Setters + // ========================================================================= + + /** + * Set the underlying storage. + */ + void set_storage(Storage&& new_storage) { + storage_ = std::move(new_storage); + } + + /** + * Set sizes and strides together. + */ + void set_sizes_and_strides(IntArrayRef sizes, IntArrayRef strides) { + ET_CHECK_MSG( + sizes.size() == strides.size(), + "sizes (%zu) and strides (%zu) must have the same length", + sizes.size(), + strides.size()); + + sizes_and_strides_.set_sizes(sizes); + sizes_and_strides_.set_strides(strides); + + refresh_numel(); + refresh_contiguous(); + } + + // ========================================================================= + // Copy Operation + // ========================================================================= + + /** + * Copy data from another tensor to this tensor. + * + * Both tensors must have the same numel and dtype. + * Currently only supports CPU-to-CPU copy (contiguous tensors only). + * + * @param other The source tensor to copy from + * @return Reference to this tensor + */ + SlimTensor& copy_(const SlimTensor& other) { + ET_CHECK_MSG( + this->numel() == other.numel(), + "copy_: numel mismatch (dst=%zu, src=%zu)", + this->numel(), + other.numel()); + ET_CHECK_MSG(this->dtype() == other.dtype(), "copy_: dtype mismatch"); + + if (this->numel() == 0) { + return *this; + } + + // Current we only support CPU-only tensors + // TODO(gasoonjia): support other device types. + ET_CHECK_MSG( + this->is_cpu() && other.is_cpu(), "copy_: only CPU tensors supported"); + + if (this->is_contiguous() && other.is_contiguous()) { + // Fast path: both tensors are contiguous, use memcpy + std::memcpy(this->data_ptr(), other.data_ptr(), other.nbytes()); + } else { + // Slow path: element-wise copy for non-contiguous tensors + copy_strided_(other); + } + + return *this; + } + + private: + /** + * Element-wise copy for non-contiguous tensors. + */ + void copy_strided_(const SlimTensor& other) { + const size_t elem_size = c10::elementSize(dtype_); + char* dst_data = static_cast(this->data_ptr()); + const char* src_data = static_cast(other.data_ptr()); + + std::vector counter(this->dim(), 0); + for (size_t i = 0; i < this->numel(); i++) { + // Compute source offset + int64_t src_offset = 0; + for (size_t d = 0; d < other.dim(); d++) { + src_offset += counter[d] * other.stride(static_cast(d)); + } + + // Compute destination offset + int64_t dst_offset = 0; + for (size_t d = 0; d < this->dim(); d++) { + dst_offset += counter[d] * this->stride(static_cast(d)); + } + + // Copy single element + std::memcpy( + dst_data + dst_offset * static_cast(elem_size), + src_data + src_offset * static_cast(elem_size), + elem_size); + + // Increment multi-dimensional counter + for (int64_t d = static_cast(this->dim()) - 1; d >= 0; --d) { + counter[d]++; + if (counter[d] < this->size(d)) { + break; + } + counter[d] = 0; + } + } + } + + void refresh_numel() { + numel_ = compute_numel(sizes_and_strides_.sizes_arrayref()); + } + + void refresh_contiguous() { + is_contiguous_ = c10::_compute_contiguous( + sizes_and_strides_.sizes_arrayref(), + sizes_and_strides_.strides_arrayref(), + static_cast(numel_)); + } + + Storage storage_; + int64_t storage_offset_{0}; + c10::SizesAndStrides sizes_and_strides_; + size_t numel_{1}; + c10::ScalarType dtype_; + bool is_contiguous_{true}; +}; + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/core/targets.bzl b/backends/aoti/slim/core/targets.bzl index 12de67bf8b1..8c352b74c28 100644 --- a/backends/aoti/slim/core/targets.bzl +++ b/backends/aoti/slim/core/targets.bzl @@ -17,3 +17,22 @@ def define_common_targets(): "//executorch/runtime/platform:platform", ], ) + + # Header-only library for SlimTensor + runtime.cxx_library( + name = "slimtensor", + headers = [ + "SlimTensor.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + ":storage", + "//executorch/backends/aoti/slim/c10/core:contiguity", + "//executorch/backends/aoti/slim/c10/core:device", + "//executorch/backends/aoti/slim/c10/core:scalar_type", + "//executorch/backends/aoti/slim/c10/core:sizes_and_strides", + "//executorch/backends/aoti/slim/util:array_ref_util", + "//executorch/backends/aoti/slim/util:size_util", + "//executorch/runtime/platform:platform", + ], + ) diff --git a/backends/aoti/slim/core/test/targets.bzl b/backends/aoti/slim/core/test/targets.bzl index 1bc6029bd2d..4d7ec4b0fbf 100644 --- a/backends/aoti/slim/core/test/targets.bzl +++ b/backends/aoti/slim/core/test/targets.bzl @@ -12,3 +12,25 @@ def define_common_targets(): "//executorch/backends/aoti/slim/core:storage", ], ) + + runtime.cxx_test( + name = "test_slimtensor_basic", + srcs = [ + "test_slimtensor_basic.cpp", + ], + deps = [ + "//executorch/backends/aoti/slim/core:slimtensor", + "//executorch/backends/aoti/slim/core:storage", + ], + ) + + runtime.cxx_test( + name = "test_slimtensor_copy", + srcs = [ + "test_slimtensor_copy.cpp", + ], + deps = [ + "//executorch/backends/aoti/slim/core:slimtensor", + "//executorch/backends/aoti/slim/core:storage", + ], + ) diff --git a/backends/aoti/slim/core/test/test_slimtensor_basic.cpp b/backends/aoti/slim/core/test/test_slimtensor_basic.cpp new file mode 100644 index 00000000000..d19a8678725 --- /dev/null +++ b/backends/aoti/slim/core/test/test_slimtensor_basic.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +namespace executorch::backends::aoti::slim { + +// Helper function to create a CPU storage with given size +Storage make_cpu_storage(size_t nbytes) { + return Storage(new MaybeOwningStorage(CPU_DEVICE, nbytes)); +} + +// Helper function to create a simple 2x3 float tensor +SlimTensor make_2x3_tensor() { + std::vector sizes = {2, 3}; + std::vector strides = {3, 1}; + size_t nbytes = 6 * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + return SlimTensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); +} + +// ============================================================================= +// Constructor Tests +// ============================================================================= + +TEST(SlimTensorBasicTest, DefaultConstructor) { + SlimTensor tensor; + + EXPECT_FALSE(tensor.defined()); + EXPECT_EQ(tensor.numel(), 0u); + EXPECT_EQ(tensor.dtype(), c10::ScalarType::Float); + EXPECT_TRUE(tensor.is_contiguous()); +} + +TEST(SlimTensorBasicTest, ConstructWithStorage) { + std::vector sizes = {2, 3, 4}; + std::vector strides = {12, 4, 1}; + size_t nbytes = 24 * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + EXPECT_TRUE(tensor.defined()); + EXPECT_EQ(tensor.dim(), 3u); + EXPECT_EQ(tensor.numel(), 24u); + EXPECT_TRUE(tensor.is_cpu()); + EXPECT_TRUE(tensor.is_contiguous()); +} + +TEST(SlimTensorBasicTest, ConstructWithStorageOffset) { + std::vector sizes = {2, 3}; + std::vector strides = {3, 1}; + size_t nbytes = 100 * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float, + 10); + + EXPECT_EQ(tensor.storage_offset(), 10); +} + +// ============================================================================= +// Property Accessor Tests +// ============================================================================= + +TEST(SlimTensorBasicTest, Sizes) { + SlimTensor tensor = make_2x3_tensor(); + + auto sizes = tensor.sizes(); + EXPECT_EQ(sizes.size(), 2u); + EXPECT_EQ(sizes[0], 2); + EXPECT_EQ(sizes[1], 3); +} + +TEST(SlimTensorBasicTest, SizeAtDim) { + SlimTensor tensor = make_2x3_tensor(); + + EXPECT_EQ(tensor.size(0), 2); + EXPECT_EQ(tensor.size(1), 3); + EXPECT_EQ(tensor.size(-1), 3); + EXPECT_EQ(tensor.size(-2), 2); +} + +TEST(SlimTensorBasicTest, Strides) { + SlimTensor tensor = make_2x3_tensor(); + + auto strides = tensor.strides(); + EXPECT_EQ(strides.size(), 2u); + EXPECT_EQ(strides[0], 3); + EXPECT_EQ(strides[1], 1); +} + +TEST(SlimTensorBasicTest, StrideAtDim) { + SlimTensor tensor = make_2x3_tensor(); + + EXPECT_EQ(tensor.stride(0), 3); + EXPECT_EQ(tensor.stride(1), 1); + EXPECT_EQ(tensor.stride(-1), 1); + EXPECT_EQ(tensor.stride(-2), 3); +} + +TEST(SlimTensorBasicTest, Dtype) { + SlimTensor tensor = make_2x3_tensor(); + + EXPECT_EQ(tensor.dtype(), c10::ScalarType::Float); + EXPECT_EQ(tensor.itemsize(), sizeof(float)); +} + +TEST(SlimTensorBasicTest, Device) { + SlimTensor tensor = make_2x3_tensor(); + + EXPECT_TRUE(tensor.is_cpu()); + EXPECT_EQ(tensor.device_type(), c10::DeviceType::CPU); + EXPECT_EQ(tensor.device_index(), 0); +} + +TEST(SlimTensorBasicTest, Numel) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_EQ(tensor.numel(), 6u); +} + +TEST(SlimTensorBasicTest, Dim) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_EQ(tensor.dim(), 2u); +} + +TEST(SlimTensorBasicTest, Nbytes) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_EQ(tensor.nbytes(), 6 * sizeof(float)); +} + +TEST(SlimTensorBasicTest, Itemsize) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_EQ(tensor.itemsize(), sizeof(float)); +} + +TEST(SlimTensorBasicTest, DataPtr) { + SlimTensor tensor = make_2x3_tensor(); + void* data = tensor.data_ptr(); + EXPECT_NE(data, nullptr); +} + +TEST(SlimTensorBasicTest, DataPtrWithOffset) { + std::vector sizes = {2, 3}; + std::vector strides = {3, 1}; + size_t nbytes = 100 * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + void* base = storage->data(); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float, + 5); + + void* data = tensor.data_ptr(); + EXPECT_EQ(data, static_cast(base) + 5 * sizeof(float)); +} + +TEST(SlimTensorBasicTest, StorageOffset) { + std::vector sizes = {2, 3}; + std::vector strides = {3, 1}; + size_t nbytes = 100 * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float, + 42); + + EXPECT_EQ(tensor.storage_offset(), 42); +} + +// ============================================================================= +// Contiguity Tests +// ============================================================================= + +TEST(SlimTensorBasicTest, IsContiguousTrue) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_TRUE(tensor.is_contiguous()); +} + +TEST(SlimTensorBasicTest, IsContiguousFalseTransposed) { + std::vector sizes = {3, 2}; + std::vector strides = {1, 3}; + size_t nbytes = 6 * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + EXPECT_FALSE(tensor.is_contiguous()); +} + +TEST(SlimTensorBasicTest, IsContiguousEmptyTensor) { + std::vector sizes = {0, 3}; + std::vector strides = {3, 1}; + size_t nbytes = 0; + Storage storage = make_cpu_storage(nbytes); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + EXPECT_TRUE(tensor.is_contiguous()); + EXPECT_TRUE(tensor.is_empty()); +} + +// ============================================================================= +// State Tests +// ============================================================================= + +TEST(SlimTensorBasicTest, Defined) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_TRUE(tensor.defined()); +} + +TEST(SlimTensorBasicTest, NotDefined) { + SlimTensor tensor; + EXPECT_FALSE(tensor.defined()); +} + +TEST(SlimTensorBasicTest, IsEmpty) { + std::vector sizes = {0}; + std::vector strides = {1}; + Storage storage = make_cpu_storage(0); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + EXPECT_TRUE(tensor.is_empty()); + EXPECT_EQ(tensor.numel(), 0u); +} + +TEST(SlimTensorBasicTest, Reset) { + SlimTensor tensor = make_2x3_tensor(); + EXPECT_TRUE(tensor.defined()); + + tensor.reset(); + EXPECT_FALSE(tensor.defined()); +} + +// ============================================================================= +// Copy/Move Tests +// ============================================================================= + +TEST(SlimTensorBasicTest, CopyConstructor) { + SlimTensor original = make_2x3_tensor(); + SlimTensor copy = original; + + EXPECT_TRUE(copy.defined()); + EXPECT_EQ(copy.dim(), 2u); + EXPECT_EQ(copy.numel(), 6u); + EXPECT_EQ(copy.dtype(), c10::ScalarType::Float); +} + +TEST(SlimTensorBasicTest, MoveConstructor) { + SlimTensor original = make_2x3_tensor(); + SlimTensor moved = std::move(original); + + EXPECT_TRUE(moved.defined()); + EXPECT_EQ(moved.dim(), 2u); + EXPECT_EQ(moved.numel(), 6u); +} + +// ============================================================================= +// Multi-dimensional Tests +// ============================================================================= + +TEST(SlimTensorBasicTest, OneDimensional) { + std::vector sizes = {10}; + std::vector strides = {1}; + Storage storage = make_cpu_storage(10 * sizeof(float)); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + EXPECT_EQ(tensor.dim(), 1u); + EXPECT_EQ(tensor.size(0), 10); + EXPECT_EQ(tensor.stride(0), 1); + EXPECT_TRUE(tensor.is_contiguous()); +} + +TEST(SlimTensorBasicTest, FourDimensional) { + std::vector sizes = {2, 3, 4, 5}; + std::vector strides = {60, 20, 5, 1}; + Storage storage = make_cpu_storage(120 * sizeof(float)); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + EXPECT_EQ(tensor.dim(), 4u); + EXPECT_EQ(tensor.numel(), 120u); + EXPECT_TRUE(tensor.is_contiguous()); +} + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/core/test/test_slimtensor_copy.cpp b/backends/aoti/slim/core/test/test_slimtensor_copy.cpp new file mode 100644 index 00000000000..a0adb083808 --- /dev/null +++ b/backends/aoti/slim/core/test/test_slimtensor_copy.cpp @@ -0,0 +1,259 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +namespace executorch::backends::aoti::slim { + +// Helper function to create a CPU storage with given size +Storage make_cpu_storage(size_t nbytes) { + return Storage(new MaybeOwningStorage(CPU_DEVICE, nbytes)); +} + +// Helper function to create a contiguous float tensor and fill with values +SlimTensor make_filled_tensor( + std::vector sizes, + std::vector strides, + const std::vector& values) { + size_t numel = 1; + for (auto s : sizes) { + numel *= static_cast(s); + } + size_t nbytes = numel * sizeof(float); + Storage storage = make_cpu_storage(nbytes); + + SlimTensor tensor( + std::move(storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + float* data = static_cast(tensor.data_ptr()); + for (size_t i = 0; i < values.size() && i < numel; ++i) { + data[i] = values[i]; + } + + return tensor; +} + +// ============================================================================= +// Basic Copy Tests +// ============================================================================= + +TEST(SlimTensorCopyTest, CopyContiguousTensors) { + std::vector sizes = {2, 3}; + std::vector strides = {3, 1}; + std::vector src_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + + SlimTensor src = make_filled_tensor(sizes, strides, src_values); + SlimTensor dst = + make_filled_tensor(sizes, strides, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + + dst.copy_(src); + + float* dst_data = static_cast(dst.data_ptr()); + EXPECT_FLOAT_EQ(dst_data[0], 1.0f); + EXPECT_FLOAT_EQ(dst_data[1], 2.0f); + EXPECT_FLOAT_EQ(dst_data[2], 3.0f); + EXPECT_FLOAT_EQ(dst_data[3], 4.0f); + EXPECT_FLOAT_EQ(dst_data[4], 5.0f); + EXPECT_FLOAT_EQ(dst_data[5], 6.0f); +} + +TEST(SlimTensorCopyTest, CopyOneDimensional) { + std::vector sizes = {5}; + std::vector strides = {1}; + std::vector src_values = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f}; + + SlimTensor src = make_filled_tensor(sizes, strides, src_values); + SlimTensor dst = + make_filled_tensor(sizes, strides, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + + dst.copy_(src); + + float* dst_data = static_cast(dst.data_ptr()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_FLOAT_EQ(dst_data[i], src_values[i]); + } +} + +TEST(SlimTensorCopyTest, CopyThreeDimensional) { + std::vector sizes = {2, 2, 2}; + std::vector strides = {4, 2, 1}; + std::vector src_values = { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; + + SlimTensor src = make_filled_tensor(sizes, strides, src_values); + SlimTensor dst = make_filled_tensor( + sizes, strides, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + + dst.copy_(src); + + float* dst_data = static_cast(dst.data_ptr()); + for (size_t i = 0; i < 8; ++i) { + EXPECT_FLOAT_EQ(dst_data[i], src_values[i]); + } +} + +TEST(SlimTensorCopyTest, CopyEmptyTensor) { + std::vector sizes = {0, 3}; + std::vector strides = {3, 1}; + Storage storage1 = make_cpu_storage(0); + Storage storage2 = make_cpu_storage(0); + + SlimTensor src( + std::move(storage1), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + SlimTensor dst( + std::move(storage2), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float); + + // Should not crash + dst.copy_(src); + + EXPECT_EQ(dst.numel(), 0u); +} + +TEST(SlimTensorCopyTest, CopyReturnsSelf) { + std::vector sizes = {2, 2}; + std::vector strides = {2, 1}; + std::vector values = {1.0f, 2.0f, 3.0f, 4.0f}; + + SlimTensor src = make_filled_tensor(sizes, strides, values); + SlimTensor dst = make_filled_tensor(sizes, strides, {0.0f, 0.0f, 0.0f, 0.0f}); + + SlimTensor& result = dst.copy_(src); + + EXPECT_EQ(&result, &dst); +} + +// ============================================================================= +// Non-Contiguous Copy Tests +// ============================================================================= + +TEST(SlimTensorCopyTest, CopyNonContiguousSrc) { + // Source is transposed (non-contiguous) + std::vector src_sizes = {2, 3}; + std::vector src_strides = {1, 2}; + + // Allocate storage for 6 elements in transposed layout + Storage src_storage = make_cpu_storage(6 * sizeof(float)); + float* src_data = static_cast(src_storage->data()); + // Physical layout: [0,3] [1,4] [2,5] for logical [0,1,2; 3,4,5] + src_data[0] = 0.0f; + src_data[1] = 3.0f; + src_data[2] = 1.0f; + src_data[3] = 4.0f; + src_data[4] = 2.0f; + src_data[5] = 5.0f; + + SlimTensor src( + std::move(src_storage), + makeArrayRef(src_sizes), + makeArrayRef(src_strides), + c10::ScalarType::Float); + + // Destination is contiguous + std::vector dst_sizes = {2, 3}; + std::vector dst_strides = {3, 1}; + SlimTensor dst = make_filled_tensor( + dst_sizes, dst_strides, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + + dst.copy_(src); + + float* dst_data = static_cast(dst.data_ptr()); + EXPECT_FLOAT_EQ(dst_data[0], 0.0f); + EXPECT_FLOAT_EQ(dst_data[1], 1.0f); + EXPECT_FLOAT_EQ(dst_data[2], 2.0f); + EXPECT_FLOAT_EQ(dst_data[3], 3.0f); + EXPECT_FLOAT_EQ(dst_data[4], 4.0f); + EXPECT_FLOAT_EQ(dst_data[5], 5.0f); +} + +TEST(SlimTensorCopyTest, CopyNonContiguousDst) { + // Source is contiguous + std::vector src_sizes = {2, 3}; + std::vector src_strides = {3, 1}; + std::vector values = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + SlimTensor src = make_filled_tensor(src_sizes, src_strides, values); + + // Destination is transposed (non-contiguous) + std::vector dst_sizes = {2, 3}; + std::vector dst_strides = {1, 2}; + Storage dst_storage = make_cpu_storage(6 * sizeof(float)); + + SlimTensor dst( + std::move(dst_storage), + makeArrayRef(dst_sizes), + makeArrayRef(dst_strides), + c10::ScalarType::Float); + + dst.copy_(src); + + float* dst_data = static_cast(dst.storage()->data()); + // After copy, physical layout should be: [0,3] [1,4] [2,5] + EXPECT_FLOAT_EQ(dst_data[0], 0.0f); + EXPECT_FLOAT_EQ(dst_data[1], 3.0f); + EXPECT_FLOAT_EQ(dst_data[2], 1.0f); + EXPECT_FLOAT_EQ(dst_data[3], 4.0f); + EXPECT_FLOAT_EQ(dst_data[4], 2.0f); + EXPECT_FLOAT_EQ(dst_data[5], 5.0f); +} + +// ============================================================================= +// Storage Offset Tests +// ============================================================================= + +TEST(SlimTensorCopyTest, CopyWithStorageOffset) { + // Create a larger storage and use offset + std::vector sizes = {2, 2}; + std::vector strides = {2, 1}; + size_t total_nbytes = 100 * sizeof(float); + + // Source with offset + Storage src_storage = make_cpu_storage(total_nbytes); + float* src_base = static_cast(src_storage->data()); + src_base[10] = 1.0f; + src_base[11] = 2.0f; + src_base[12] = 3.0f; + src_base[13] = 4.0f; + + SlimTensor src( + std::move(src_storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float, + 10); + + // Destination with different offset + Storage dst_storage = make_cpu_storage(total_nbytes); + SlimTensor dst( + std::move(dst_storage), + makeArrayRef(sizes), + makeArrayRef(strides), + c10::ScalarType::Float, + 20); + + dst.copy_(src); + + float* dst_base = static_cast(dst.storage()->data()); + EXPECT_FLOAT_EQ(dst_base[20], 1.0f); + EXPECT_FLOAT_EQ(dst_base[21], 2.0f); + EXPECT_FLOAT_EQ(dst_base[22], 3.0f); + EXPECT_FLOAT_EQ(dst_base[23], 4.0f); +} + +} // namespace executorch::backends::aoti::slim