Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 43 additions & 170 deletions paddle/phi/api/include/compat/ATen/core/TensorAccessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,203 +18,76 @@

#pragma once

#include <torch/headeronly/core/TensorAccessor.h>

#include <c10/macros/Macros.h>
#include <c10/util/ArrayRef.h>
#include <c10/util/Exception.h>

#include <cstddef>
#include <cstdint>
#include <type_traits>

namespace at {
template <typename T>
struct DefaultPtrTraits {
typedef T* PtrType;
};

using torch::headeronly::DefaultPtrTraits;
#if defined(__CUDACC__) || defined(__HIPCC__)
using torch::headeronly::RestrictPtrTraits;
#endif

template <typename T,
size_t N,
template <typename U> class PtrTraits = DefaultPtrTraits,
typename index_t = int64_t>
class TensorAccessorBase {
public:
typedef typename PtrTraits<T>::PtrType PtrType;

C10_HOST_DEVICE TensorAccessorBase(PtrType data_,
const index_t* sizes_,
const index_t* strides_) // NOLINT
: data_(data_), sizes_(sizes_), strides_(strides_) {} // NOLINT
C10_HOST IntArrayRef sizes() const { return IntArrayRef(sizes_, N); }
C10_HOST IntArrayRef strides() const { return IntArrayRef(strides_, N); }
C10_HOST_DEVICE index_t stride(index_t i) const { return strides_[i]; }
C10_HOST_DEVICE index_t size(index_t i) const { return sizes_[i]; }
C10_HOST_DEVICE PtrType data() { return data_; }
C10_HOST_DEVICE const PtrType data() const { return data_; }

protected:
PtrType data_;
const index_t* sizes_;
const index_t* strides_;
};
using TensorAccessorBase = torch::headeronly::detail::
TensorAccessorBase<c10::IntArrayRef, T, N, PtrTraits, index_t>;

// The `TensorAccessor` is typically instantiated for CPU `Tensor`s using
// `Tensor.accessor<T, N>()`.
// For CUDA `Tensor`s, `GenericPackedTensorAccessor` is used on the host and
// only indexing on the device uses `TensorAccessor`s.
template <typename T,
size_t N,
template <typename U> class PtrTraits = DefaultPtrTraits,
typename index_t = int64_t>
class TensorAccessor : public TensorAccessorBase<T, N, PtrTraits, index_t> {
public:
typedef typename PtrTraits<T>::PtrType PtrType;

C10_HOST_DEVICE TensorAccessor(PtrType data_,
const index_t* sizes_,
const index_t* strides_)
: TensorAccessorBase<T, N, PtrTraits, index_t>(data_, sizes_, strides_) {}

C10_HOST_DEVICE TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](
index_t i) {
return TensorAccessor<T, N - 1, PtrTraits, index_t>(
this->data_ + this->strides_[0] * i,
this->sizes_ + 1,
this->strides_ + 1);
}

C10_HOST_DEVICE const TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](
index_t i) const {
return TensorAccessor<T, N - 1, PtrTraits, index_t>(
this->data_ + this->strides_[0] * i,
this->sizes_ + 1,
this->strides_ + 1);
using TensorAccessor = torch::headeronly::detail::
TensorAccessor<c10::IntArrayRef, T, N, PtrTraits, index_t>;

namespace detail {

template <size_t N, typename index_t>
struct IndexBoundsCheck {
explicit IndexBoundsCheck(index_t i) {
TORCH_CHECK(0 <= i && i < index_t{N},
"Index ",
i,
" is not within bounds of a tensor of dimension ",
N);
}
};

template <typename T, template <typename U> class PtrTraits, typename index_t>
class TensorAccessor<T, 1, PtrTraits, index_t>
: public TensorAccessorBase<T, 1, PtrTraits, index_t> {
public:
typedef typename PtrTraits<T>::PtrType PtrType;

C10_HOST_DEVICE TensorAccessor(PtrType data_,
const index_t* sizes_,
const index_t* strides_)
: TensorAccessorBase<T, 1, PtrTraits, index_t>(data_, sizes_, strides_) {}
C10_HOST_DEVICE T& operator[](index_t i) {
return this->data_[this->strides_[0] * i];
}
C10_HOST_DEVICE const T& operator[](index_t i) const {
return this->data_[this->strides_[0] * i];
}
};
} // namespace detail

// GenericPackedTensorAccessorBase and GenericPackedTensorAccessor are used on
// for CUDA `Tensor`s on the host and as In contrast to `TensorAccessor`s, they
// copy the strides and sizes on instantiation (on the host) in order to
// transfer them on the device when calling kernels. On the device, indexing of
// multidimensional tensors gives to `TensorAccessor`s. Use RestrictPtrTraits as
// PtrTraits if you want the tensor's data pointer to be marked as __restrict__.
// Instantiation from data, sizes, strides is only needed on the host and
// std::copy isn't available on the device, so those functions are host only.
template <typename T,
size_t N,
template <typename U> class PtrTraits = DefaultPtrTraits,
typename index_t = int64_t>
class GenericPackedTensorAccessorBase {
public:
typedef typename PtrTraits<T>::PtrType PtrType;
C10_HOST GenericPackedTensorAccessorBase(PtrType data_arg,
const index_t* sizes_arg,
const index_t* strides_arg)
: data_(data_arg) {
std::copy(sizes_arg, sizes_arg + N, std::begin(this->sizes_));
std::copy(strides_arg, strides_arg + N, std::begin(this->strides_));
}

// if index_t is not int64_t, we want to have an int64_t constructor
template <typename source_index_t,
class = std::enable_if_t<std::is_same_v<source_index_t, int64_t>>>
C10_HOST GenericPackedTensorAccessorBase(PtrType data_arg,
const source_index_t* sizes_arg,
const source_index_t* strides_arg)
: data_(data_arg) {
for (size_t i = 0; i < N; ++i) {
this->sizes_[i] = sizes_arg[i];
this->strides_[i] = strides_arg[i];
}
}

C10_HOST_DEVICE index_t stride(index_t i) const { return strides_[i]; }
C10_HOST_DEVICE index_t size(index_t i) const { return sizes_[i]; }
C10_HOST_DEVICE PtrType data() { return data_; }
C10_HOST_DEVICE const PtrType data() const { return data_; }

protected:
PtrType data_;
// NOLINTNEXTLINE
index_t sizes_[N];
// NOLINTNEXTLINE
index_t strides_[N];
C10_HOST void bounds_check_(index_t i) const {
TORCH_CHECK_INDEX(0 <= i && i < index_t{N},
"Index ",
i,
" is not within bounds of a tensor of dimension ",
N);
}
};
using GenericPackedTensorAccessorBase =
torch::headeronly::detail::GenericPackedTensorAccessorBase<
detail::IndexBoundsCheck<N, index_t>,
T,
N,
PtrTraits,
index_t>;

template <typename T,
size_t N,
template <typename U> class PtrTraits = DefaultPtrTraits,
typename index_t = int64_t>
class GenericPackedTensorAccessor
: public GenericPackedTensorAccessorBase<T, N, PtrTraits, index_t> {
public:
typedef typename PtrTraits<T>::PtrType PtrType;

C10_HOST GenericPackedTensorAccessor(PtrType data_,
const index_t* sizes_,
const index_t* strides_)
: GenericPackedTensorAccessorBase<T, N, PtrTraits, index_t>(
data_, sizes_, strides_) {}

// if index_t is not int64_t, we want to have an int64_t constructor
template <typename source_index_t,
class = std::enable_if_t<std::is_same_v<source_index_t, int64_t>>>
C10_HOST GenericPackedTensorAccessor(PtrType data_,
const source_index_t* sizes_,
const source_index_t* strides_)
: GenericPackedTensorAccessorBase<T, N, PtrTraits, index_t>(
data_, sizes_, strides_) {}

C10_DEVICE TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](
index_t i) {
index_t* new_sizes = this->sizes_ + 1;
index_t* new_strides = this->strides_ + 1;
return TensorAccessor<T, N - 1, PtrTraits, index_t>(
this->data_ + this->strides_[0] * i, new_sizes, new_strides);
}

C10_DEVICE const TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](
index_t i) const {
const index_t* new_sizes = this->sizes_ + 1;
const index_t* new_strides = this->strides_ + 1;
return TensorAccessor<T, N - 1, PtrTraits, index_t>(
this->data_ + this->strides_[0] * i, new_sizes, new_strides);
}

/// Returns a PackedTensorAccessor of the same dimension after transposing the
/// two dimensions given. Does not actually move elements; transposition is
/// made by permuting the size/stride arrays. If the dimensions are not valid,
/// asserts.
C10_HOST GenericPackedTensorAccessor<T, N, PtrTraits, index_t> transpose(
index_t dim1, index_t dim2) const {
this->bounds_check_(dim1);
this->bounds_check_(dim2);
GenericPackedTensorAccessor<T, N, PtrTraits, index_t> result(
this->data_, this->sizes_, this->strides_);
std::swap(result.strides_[dim1], result.strides_[dim2]);
std::swap(result.sizes_[dim1], result.sizes_[dim2]);
return result;
}
};
using GenericPackedTensorAccessor =
torch::headeronly::detail::GenericPackedTensorAccessor<
TensorAccessor<T, N - 1, PtrTraits, index_t>,
detail::IndexBoundsCheck<N, index_t>,
T,
N,
PtrTraits,
index_t>;

template <typename T,
size_t N,
Expand Down
51 changes: 4 additions & 47 deletions paddle/phi/api/include/compat/c10/core/DeviceType.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,16 @@

#pragma once

#include <cstdint>
#include <functional>
// If you modified DeviceType in this file, please also sync your changes into
// torch/headeronly/core/DeviceType.h.
#include <torch/headeronly/core/DeviceType.h>

Comment on lines +17 to +20
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

c10/core/DeviceType.h previously re-exported c10::{DeviceType,kCUDA,kCPU,...} into namespace torch, and this PR removes those aliases. torch/all.h brings in c10/core/DeviceType.h via c10/core/Device.h, and code/tests (e.g. test/cpp/compat/ATen_basic_test.cc) use torch::kCUDA; this will stop compiling. Please restore the namespace torch { using c10::DeviceType; using c10::kCUDA; ... } re-exports (either here or in the included headeronly DeviceType header) to maintain the existing API surface.

Copilot uses AI. Check for mistakes.
#include <ostream>

#include "paddle/phi/common/place.h"

namespace c10 {

enum class DeviceType : int8_t {
CPU = 0,
CUDA = 1,
XPU = 12,
IPU = 18,
CUSTOM = 20,
PrivateUse1 = CUSTOM,
};

constexpr DeviceType kCUDA = DeviceType::CUDA;
constexpr DeviceType kCPU = DeviceType::CPU;
constexpr DeviceType kCUSTOM = DeviceType::CUSTOM;
constexpr DeviceType kXPU = DeviceType::XPU;
constexpr DeviceType kIPU = DeviceType::IPU;
constexpr DeviceType kPrivateUse1 = DeviceType::PrivateUse1;

inline phi::AllocationType DeviceTypeToPhi(DeviceType d) {
switch (d) {
case DeviceType::CPU:
Expand Down Expand Up @@ -106,32 +92,3 @@ inline std::ostream& operator<<(std::ostream& os, DeviceType d) {
}

} // namespace c10

namespace std {
template <>
struct hash<c10::DeviceType> {
std::size_t operator()(c10::DeviceType k) const noexcept {
return std::hash<int>()(static_cast<int>(k));
}
};
} // namespace std

namespace at {
using c10::DeviceType;
using c10::kCPU;
using c10::kCUDA;
using c10::kCUSTOM;
using c10::kIPU;
using c10::kPrivateUse1;
using c10::kXPU;
} // namespace at

namespace torch {
using c10::DeviceType;
using c10::kCPU;
using c10::kCUDA;
using c10::kCUSTOM;
using c10::kIPU;
using c10::kPrivateUse1;
using c10::kXPU;
} // namespace torch
Loading
Loading