DPC++ で DFT を設定して計算

目次

DPC++ で DFT を設定して計算#

以下に、デフォルトまたは推奨されるストライドを使用して、デバイスがアクセス可能な USM 割り当てを使用して (バッチ処理される場合もある) DFT を計算する使用例をいくつか示します。さらに実用的な例は、oneMKL インストール・ディレクトリーの share/doc/mkl/examples/sycl/dft にもあります。

DPC++ インターフェイスを使用するには、oneapi/mkl/dft.hpp をインクルードします。DPC++ インターフェイスは、前のページに示したいくつかの制限付きで CPU およびインテル® GPU デバイスをサポートします。その他の (リリース固有の) 既知の制限については、インテル® oneAPI マス・カーネル・ライブラリーのリリースノート (英語) を参照してください。

DFT 関数は、複素数データの直交座標表現を前提としています。複素数は実数部と虚数部によって定義されます。oneMKL は、極座標表現との間で変換する効率的なベクトル数学関数を提供します。

使用例#

次の例は、oneMKL の DPC++ インターフェイスを介して、他のイベントに依存せずにデバイスにアクセス可能な USM 割り当てを使用して、(場合によっては多数の) 複素数および実数の離散フーリエ変換を計算する方法を示しています。

#include <oneapi/mkl/dft.hpp> 
#include <complex> 
enum class dft_compute_direction { 
   FWD_DFT, 
   BWD_DFT 
}; 
namespace dft_ns = oneapi::mkl::dft; 
template<typename T> 
bool is_usable_compute_type = std::is_same_v<T, float> | 
                              std::is_same_v<T, std::complex<float>> | 
                              std::is_same_v<T, double> | 
                              std::is_same_v<T, std::complex<double>>; 

template<dft_ns::precision prec, dft_ns::domain dom> 
void ready_descriptor(dft_ns::descriptor<prec, dom>& desc, 
                      sycl::queue& q, 
                      dft_ns::config_value placement, 
                      std::int64_t batch_size) { 
   // this example assumes that desc was freshly created and that its default 
   // configuration values have not been changed prior to invoking this routine 

   // Fetch rank, lengths and type of forward domain 
   std::int64_t rank; 
   desc.get_value(dft_ns::config_param::DIMENSION, &rank); 
   std::vector<std::int64_t> lengths(rank); 
   desc.get_value(dft_ns::config_param::LENGTHS, &lengths); 
   if (placement == dft_ns::config_value::NOT_INPLACE) { 
      // non-default behavior 
      desc.set_value(dft_ns::config_param::PLACEMENT, placement); 
      if constexpr (dom == dft_ns::domain::REAL) { 
         // for real descriptors interpreting backward domain's data type as 
         // complex (default behavior), unpadded forward-domain data layouts 
         // may be used if operating out-of-place: 
         std::vector<std::int64_t> fwd_strides(rank + 1); 
         std::vector<std::int64_t> bwd_strides(rank + 1); 
         fwd_strides[rank] = bwd_strides[rank] = 1; 
         if (rank > 1) { 
            fwd_strides[rank - 1] = lengths[rank - 1]; 
            bwd_strides[rank - 1] = lengths[rank - 1] / 2 + 1; 
            for (std::int64_t dim = 2; dim < rank; dim++) { 
               fwd_strides[rank - dim] = fwd_strides[rank - dim + 1] * lengths[rank - dim]; 
               bwd_strides[rank - dim] = bwd_strides[rank - dim + 1] * lengths[rank - dim]; 
            } } 
            fwd_strides[0] = bwd_strides[0] = 0; // offsets in data layouts 
            desc.set_value(dft_ns::config_param::FWD_STRIDES, fwd_strides); 
            desc.set_value(dft_ns::config_param::BWD_STRIDES, bwd_strides); 
      } 
   } 
   if (batch_size > 1) { 
      // configuration of batched DFTs 
      desc.set_value(dft_ns::config_param::NUMBER_OF_TRANSFORMS, batch_size); 
      std::int64_t fwd_distance, bwd_distance; 
      fwd_distance = bwd_distance = 1; 
      for (std::int64_t dim = 0; dim < rank - 1; dim++) { 
         fwd_distance *= lengths[dim]; 
         bwd_distance *= lengths[dim]; 
      } 
      if constexpr (dom == dft_ns::domain::REAL) { 
         if (placement == dft_ns::config_value::NOT_INPLACE) 
            fwd_distance *= lengths[rank - 1]; 
         else 
            fwd_distance *= 2 * (lengths[rank - 1] / 2 + 1); 
         bwd_distance *= (lengths[rank - 1] / 2 + 1); 
      } 
      else { 
         fwd_distance *= lengths[rank - 1]; 
         bwd_distance *= lengths[rank - 1]; 
      } 
      desc.set_value(dft_ns::config_param::FWD_DISTANCE, fwd_distance); 
      desc.set_value(dft_ns::config_param::BWD_DISTANCE, bwd_distance); 
   } 
   desc.commit(q); 
} 

template <typename T, std::enable_if_t<is_usable_compute_type<T>, bool> = true> 
sycl::event compute_inplace_cmplx_dft(sycl::queue& q, 
                                      const std::vector<std::int64_t>& lengths, 
                                      const std::int64_t& batch_size, 
                                      const dft_compute_direction& direction, 
                                      T* device_accessible_usm_data) { 
   constexpr auto prec = 
      std::is_same_v<T, float> | std::is_same_v<T, std::complex<float>> ? 
         dft_ns::precision::SINGLE : dft_ns::precision::DOUBLE; 
   auto desc = dft_ns::descriptor<prec, dft_ns::domain::COMPLEX>(lengths); 
   ready_descriptor(desc, q, dft_ns::config_value::INPLACE, batch_size); 
   // Initialize data such that input data entry of multi-index 
   // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
   // is Z[ m*lengths[0]*lengths[1]*lengths[2] 
   // + k1*lengths[1]*lengths[2] + k2*lengths[2] + k3 ] 
   // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
   // is Z[ m*lengths[0]*lengths[1] + k1*lengths[1] + k2 ] 
   // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
   // is Z[ m*lengths[0] + k1 ] 
   // wherein 
   // using real_t = 
   // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
   // std::complex<real_t>* Z = 
   // static_cast<std::complex<real_t>*>(device_accessible_usm_data); 
   // (0 <= kj < lengths[j-1]) 
   if (direction == dft_compute_direction::FWD_DFT) 
      return dft_ns::compute_forward(desc, device_accessible_usm_data); 
   else 
      return dft_ns::compute_backward(desc, device_accessible_usm_data); 
   // Upon completion of any of the above, the output data entry of multi-index 
   // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
   // is Z[ m*lengths[0]*lengths[1]*lengths[2] 
   // + k1*lengths[1]*lengths[2] + k2*lengths[2] + k3 ] 
   // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
   // is Z[ m*lengths[0]*lengths[1] + k1*lengths[1] + k2 ] 
   // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
   // is Z[ m*lengths[0] + k1 ] 
   // (0 <= kj < lengths[j-1]) 
} 

template <typename T, std::enable_if_t<is_usable_compute_type<T>, bool> = true> 
sycl::event compute_outofplace_cmplx_dft(sycl::queue& q, 
                                         const std::vector<std::int64_t>& lengths, 
                                         const std::int64_t& batch_size, 
                                         const dft_compute_direction& direction, 
                                         T* device_accessible_usm_data_in, 
                                         T* device_accessible_usm_data_out) { 
   constexpr bool is_single_precision = 
      std::is_same_v<T, float> | std::is_same_v<T, std::complex<float>>; 
   constexpr auto prec = (is_single_precision) ? 
      dft_ns::precision::SINGLE : dft_ns::precision::DOUBLE; 
   auto desc = dft_ns::descriptor<prec, dft_ns::domain::COMPLEX>(lengths); 
   ready_descriptor(desc, q, dft_ns::config_value::NOT_INPLACE, batch_size); 
   // Initialize data such that input data entry of multi-index 
   // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
   // is Z[ m*lengths[0]*lengths[1]*lengths[2] 
   // + k1*lengths[1]*lengths[2] + k2*lengths[2] + k3 ] 
   // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
   // is Z[ m*lengths[0]*lengths[1] + k1*lengths[1] + k2 ] 
   // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
   // is Z[ m*lengths[0] + k1 ] 
   // wherein 
   // using real_t = 
   // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
   // std::complex<real_t>* Z = 
   // static_cast<std::complex<real_t>*>(device_accessible_usm_data_in); 
   // (0 <= kj < lengths[j-1]) 
   if (direction == dft_compute_direction::FWD_DFT) 
      return dft_ns::compute_forward(desc, device_accessible_usm_data_in, 
                                           device_accessible_usm_data_out); 
   else 
      return dft_ns::compute_backward(desc, device_accessible_usm_data_in, 
                                            device_accessible_usm_data_out); 
   // Upon completion of any of the above, the output data entry of multi-index 
   // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
   // = Y[ m*lengths[0]*lengths[1]*lengths[2] 
   // + k1*lengths[1]*lengths[2] + k2*lengths[2] + k3 ] 
   // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
   // = Y[ m*lengths[0]*lengths[1] + k1*lengths[1] + k2 ] 
   // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
   // = Y[ m*lengths[0] + k1 ] 
   // wherein 
   // using real_t = 
   // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
   // std::complex<real_t>* Y = 
   // static_cast<std::complex<real_t>*>(device_accessible_usm_data_out); 
   // (0 <= kj < lengths[j-1]) 
} 

template<typename T, std::enable_if_t<is_usable_compute_type<T>, bool> = true> 
sycl::event compute_inplace_real_dft(sycl::queue& q, 
                                     const std::vector<std::int64_t>& lengths, 
                                     const std::int64_t& batch_size, 
                                     const dft_compute_direction& direction, 
                                     T* device_accessible_usm_data) { 
   constexpr bool is_single_precision = 
      std::is_same_v<T, float> | std::is_same_v<T, std::complex<float>>; 
   constexpr auto prec = (is_single_precision) ? 
      dft_ns::precision::SINGLE : dft_ns::precision::DOUBLE; 
   auto desc = dft_ns::descriptor<prec, dft_ns::domain::REAL>(lengths); 
   ready_descriptor(desc, q, dft_ns::config_value::INPLACE, batch_size); 
   // using real_t = 
   // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
   // Let 
   // real_t* fwd_Z = static_cast<real_t*>(device_accessible_usm_data); 
   // and // std::complex<real_t>* bwd_Z = 
   // static_cast<std::complex<real_t>*>(device_accessible_usm_data); 
   if (direction == dft_compute_direction::FWD_DFT) { 
      // Initialize data such that input data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is fwd_Z[ m*lengths[0]*lengths[1]*2*(lengths[2]/2 + 1) 
      // + k1*lengths[1]*2*(lengths[2]/2 + 1) 
      // + k2*2*(lengths[2]/2 + 1) + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is fwd_Z[ m*lengths[0]*2*(lengths[1]/2 + 1) 
      // + k1*2*(lengths[1]/2 + 1) + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is fwd_Z[ m*2*(lengths[0]/2 + 1) + k1 ] 
      // (0 <= kj < lengths[j-1]) 
      return dft_ns::compute_forward(desc, device_accessible_usm_data); 
      // Upon completion of the above, the output data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is bwd_Z[ m*lengths[0]*lengths[1]*(lengths[2]/2 + 1) // + k1*lengths[1]*(lengths[2]/2 + 1) 
      // + k2*(lengths[2]/2 + 1) + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is bwd_Z[ m*lengths[0]*(lengths[1]/2 + 1) 
      // + k1*(lengths[1]/2 + 1) + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is bwd_Z[ m*(lengths[0]/2 + 1) + k1 ] 
      // (0 <= kj < lengths[j-1] for j < lengths.size() - 1 and 
      // 0 <= kj < lengths[j-1]/2 + 1 for j == lengths.size() - 1) 
   } 
   else { 
      // Initialize data such that input data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is bwd_Z[ m*lengths[0]*lengths[1]*(lengths[2]/2 + 1) 
      // + k1*lengths[1]*(lengths[2]/2 + 1) 
      // + k2*(lengths[2]/2 + 1) + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is bwd_Z[ m*lengths[0]*(lengths[1]/2 + 1) 
      // + k1*(lengths[1]/2 + 1) + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is bwd_Z[ m*(lengths[0]/2 + 1) + k1 ] 
      // (0 <= kj < lengths[j-1] for j < lengths.size() - 1 and 
      // 0 <= kj < lengths[j-1]/2 + 1 for j == lengths.size() - 1) 
      return dft_ns::compute_backward(desc, device_accessible_usm_data); 
      // Upon completion of the above, the output data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is fwd_Z[ m*lengths[0]*lengths[1]*2*(lengths[2]/2 + 1) 
      // + k1*lengths[1]*2*(lengths[2]/2 + 1) 
      // + k2*2*(lengths[2]/2 + 1) + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is fwd_Z[ m*lengths[0]*2*(lengths[1]/2 + 1) 
      // + k1*2*(lengths[1]/2 + 1) + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is fwd_Z[ m*2*(lengths[0]/2 + 1) + k1 ] 
      // (0 <= kj < lengths[j-1]) 
   } 
} 

template<typename T, std::enable_if_t<is_usable_compute_type<T>, bool> = true> 
sycl::event compute_outofplace_real_dft(sycl::queue& q, 
                                        const std::vector<std::int64_t>& lengths, 
                                        const std::int64_t& batch_size, 
                                        const dft_compute_direction& direction, 
                                        T* device_accessible_usm_data_in, 
                                        T* device_accessible_usm_data_out) { 
   constexpr bool is_single_precision = 
      std::is_same_v<T, float> | std::is_same_v<T, std::complex<float>>; 
   constexpr auto prec = (is_single_precision) ? 
      dft_ns::precision::SINGLE : dft_ns::precision::DOUBLE; 
   auto desc = dft_ns::descriptor<prec, dft_ns::domain::REAL>(lengths); 
   ready_descriptor(desc, q, dft_ns::config_value::NOT_INPLACE, batch_size); 
   if (direction == dft_compute_direction::FWD_DFT) { 
      // Initialize data such that input data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is fwd_Z[ m*lengths[0]*lengths[1]*lengths[2] 
      // + k1*lengths[1]*lengths[2] + k2*lengths[2] + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is fwd_Z[ m*lengths[0]*lengths[1] + k1*lengths[1] + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is fwd_Z[ m*lengths[0] + k1 ] 
      // wherein 
      // using real_t = 
      // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
      // real_t* fwd_Z = static_cast<real_t*>(device_accessible_usm_data_in); 
      // (0 <= kj < lengths[j-1]) 
      return dft_ns::compute_forward(desc, device_accessible_usm_data_in, 
                                           device_accessible_usm_data_out); 
      // Upon completion of the above, the output data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is bwd_Z[ m*lengths[0]*lengths[1]*(lengths[2]/2 + 1) 
      // + k1*lengths[1]*(lengths[2]/2 + 1) 
      // + k2*(lengths[2]/2 + 1) + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) // is bwd_Z[ m*lengths[0]*(lengths[1]/2 + 1) 
      // + k1*(lengths[1]/2 + 1) + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is bwd_Z[ m*(lengths[0]/2 + 1) + k1 ] 
      // (0 <= kj < lengths[j-1] for j < lengths.size() - 1 and 
      // 0 <= kj < lengths[j-1]/2 + 1 for j == lengths.size() - 1) 
      // wherein 
      // using real_t = 
      // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
      // std::complex<real_t>* bwd_Z = 
      // static_cast<std::complex<real_t>*>(device_accessible_usm_data_out); 
   } 
   else { 
      // Initialize data such that input data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is bwd_Z[ m*lengths[0]*lengths[1]*(lengths[2]/2 + 1) // + k1*lengths[1]*(lengths[2]/2 + 1) 
      // + k2*(lengths[2]/2 + 1) + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is bwd_Z[ m*lengths[0]*(lengths[1]/2 + 1) 
      // + k1*(lengths[1]/2 + 1) + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is bwd_Z[ m*(lengths[0]/2 + 1) + k1 ] 
      // (0 <= kj < lengths[j-1] for j < lengths.size() - 1 and 
      // 0 <= kj < lengths[j-1]/2 + 1 for j == lengths.size() - 1) 
      // wherein 
      // using real_t = 
      // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
      // std::complex<real_t>* bwd_Z = 
      // static_cast<std::complex<real_t>*>(device_accessible_usm_data_in); 
      return dft_ns::compute_backward(desc, device_accessible_usm_data_in, 
                                            device_accessible_usm_data_out); 
      // Upon completion of the above, the output data entry of multi-index 
      // - {k1,k2,k3; m} (for 3D DFTs, i.e., lengths.size() == 3) 
      // is fwd_Z[ m*lengths[0]*lengths[1]*lengths[2] 
      // + k1*lengths[1]*lengths[2] + k2*lengths[2] + k3 ] 
      // - {k1,k2; m} (for 2D DFTs, i.e., lengths.size() == 2) 
      // is fwd_Z[ m*lengths[0]*lengths[1] + k1*lengths[1] + k2 ] 
      // - {k1; m} (for 1D DFTs, i.e., lengths.size() == 1) 
      // is fwd_Z[ m*lengths[0] + k1 ] 
      // (0 <= kj < lengths[j-1]) 
      // wherein 
      // using real_t = 
      // std::conditional_t<prec == dft_ns::precision::SINGLE, float, double>; 
      // real_t* fwd_Z = static_cast<real_t*>(device_accessible_usm_data_out); 
   } 
}