|
Highly Efficient FFT for Exascale: HeFFTe v2.4
|
Specialization for the data operations in CUDA mode. More...
#include <heffte_backend_cuda.h>
Public Types | |
| using | stream_type = cudaStream_t |
| The stream type for the device. | |
| using | backend_device = backend::device_instance<tag::gpu> |
| Defines the backend_device. | |
| using | stream_type = sycl::queue& |
| The stream type for the device. | |
| using | backend_device = backend::device_instance<tag::gpu> |
| Defines the backend_device. | |
| using | stream_type = hipStream_t |
| The stream type for the device. | |
| using | backend_device = backend::device_instance<tag::gpu> |
| Defines the backend_device. | |
Static Public Member Functions | |
| template<typename scalar_type > | |
| static scalar_type * | allocate (cudaStream_t, size_t num_entries) |
| Allocate memory. | |
| template<typename scalar_type > | |
| static void | free (cudaStream_t, scalar_type *pntr) |
| Free memory. | |
| template<typename scalar_type > | |
| static void | copy_n (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Equivalent to std::copy_n() but using CUDA arrays. | |
| template<typename scalar_type > | |
| static void | copy_n (cudaStream_t stream, std::complex< scalar_type > const source[], size_t num_entries, scalar_type destination[]) |
| Copy-convert complex-to-real. | |
| template<typename scalar_type > | |
| static void | copy_n (cudaStream_t stream, scalar_type const source[], size_t num_entries, std::complex< scalar_type > destination[]) |
| Copy-convert real-to-complex. | |
| template<typename scalar_type > | |
| static void | copy_device_to_host (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the device to the host. | |
| template<typename scalar_type > | |
| static void | copy_device_to_device (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the device to the device. | |
| template<typename scalar_type > | |
| static void | copy_host_to_device (cudaStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the host to the device. | |
| template<typename scalar_type > | |
| static scalar_type * | allocate (sycl::queue &stream, size_t num_entries) |
| Allocate memory. | |
| template<typename scalar_type > | |
| static void | free (sycl::queue &stream, scalar_type *pntr) |
| Free memory. | |
| template<typename scalar_type > | |
| static void | copy_n (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Equivalent to std::copy_n() but using CUDA arrays. | |
| template<typename scalar_type > | |
| static void | copy_n (sycl::queue &stream, std::complex< scalar_type > const source[], size_t num_entries, scalar_type destination[]) |
| Copy-convert complex-to-real. | |
| template<typename scalar_type > | |
| static void | copy_n (sycl::queue &stream, scalar_type const source[], size_t num_entries, std::complex< scalar_type > destination[]) |
| Copy-convert real-to-complex. | |
| template<typename scalar_type > | |
| static void | copy_device_to_host (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the device to the host. | |
| template<typename scalar_type > | |
| static void | copy_device_to_device (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the device to the device. | |
| template<typename scalar_type > | |
| static void | copy_host_to_device (sycl::queue &stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the host to the device. | |
| template<typename scalar_type > | |
| static scalar_type * | allocate (hipStream_t, size_t num_entries) |
| Allocate memory. | |
| template<typename scalar_type > | |
| static void | free (hipStream_t, scalar_type *pntr) |
| Free memory. | |
| template<typename scalar_type > | |
| static void | copy_n (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Equivalent to std::copy_n() but using CUDA arrays. | |
| template<typename scalar_type > | |
| static void | copy_n (hipStream_t stream, std::complex< scalar_type > const source[], size_t num_entries, scalar_type destination[]) |
| Copy-convert complex-to-real. | |
| template<typename scalar_type > | |
| static void | copy_n (hipStream_t stream, scalar_type const source[], size_t num_entries, std::complex< scalar_type > destination[]) |
| Copy-convert real-to-complex. | |
| template<typename scalar_type > | |
| static void | copy_device_to_host (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the device to the host. | |
| template<typename scalar_type > | |
| static void | copy_device_to_device (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the device to the device. | |
| template<typename scalar_type > | |
| static void | copy_host_to_device (hipStream_t stream, scalar_type const source[], size_t num_entries, scalar_type destination[]) |
| Copy the date from the host to the device. | |
Specialization for the data operations in CUDA mode.
Specialization for the data operations in ROCm mode.