oneMKL RNG デバイス利用モデル#

デバイスルーチンの一般的な使用モデルは、oneMKL RNG 利用モデルで説明されているものと同じです:

  1. 基本的な乱数生成器のオブジェクトを作成して初期化します。

  2. 配布生成器のオブジェクトを作成して初期化します。

  3. 適切な統計分布を持つ乱数を取得するには、生成ルーチンを呼び出します。

スカラー乱数生成の例#

1  #include <iostream> 
2  #include <sycl/sycl.hpp> 
3  #include "oneapi/mkl/rng/device.hpp" 
4 
5 
6  int main() { 
7     sycl::queue queue; 
8     const int n = 1000; 
9     const int seed = 1; 
10    // Prepare an array for random numbers 
11    std::vector<float> r(n); 
12 
13 
14    sycl::buffer<float, 1> r_buf(r.data(), r.size()); 
15    // Submit a kernel to generate on device 
16    queue.submit([&](sycl::handler& cgh) { 
17         auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); 
18         cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> item) { 
19             // Create an engine object 
20             oneapi::mkl::rng::device::philox4x32x10<> engine(seed, item.get_id(0)); 
21             // Create a distribution object 
22             oneapi::mkl::rng::device::uniform<> distr; 
23             // Call generate function to obtain scalar random number 
24             float res = oneapi::mkl::rng::device::generate(distr, engine); 
25 
26 
27             r_acc[item.get_id(0)] = res; 
28         }); 
29     }); 
30 
31 
32     auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); 
33     std::cout << "Samples of uniform distribution" << std::endl; 
34     for(int i = 0; i < 10; i++) { 
35         std::cout << r_acc[i] << std::endl; 
36     } 
37 
38 
39     return 0; 
40 }

ベクトル乱数生成の例#

1  #include <iostream> 
2  #include <sycl/sycl.hpp> 3 
4 
5  #include "oneapi/mkl/rng/device.hpp" 
6 
7 
8  int main() { 
9     sycl::queue queue; 
10    const int n = 1000; 
11    const int seed = 1; 
12    const int vec_size = 4; 
13    // Prepare an array for random numbers 
14    std::vector<float> r(n); 
15 
16 
17    sycl::buffer<float, 1> r_buf(r.data(), r.size()); 
18    // Submit a kernel to generate on device 
19    sycl::queue{}.submit([&](sycl::handler& cgh) { 
20        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); 
21        cgh.parallel_for(sycl::range<1>(n / vec_size), [=](sycl::item<1> item) { 
22            // Create an engine object 
23            oneapi::mkl::rng::device::philox4x32x10<vec_size> engine(seed, item.get_id(0) * vec_size); 
24            // Create a distribution object 
25            oneapi::mkl::rng::device::uniform<> distr; 
26            // Call generate function to obtain sycl::vec<float, 4> with random numbers 
27            auto res = oneapi::mkl::rng::device::generate(distr, engine); 
28 
29 
30            res.store(ite.get_id(0), r_acc); 
31        }); 
32    }); 
33 
34 
35     auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); 
36     std::cout << "Samples of uniform distribution" << std::endl; 
37     for(int i = 0; i < 10; i++) { 
38        std::cout << r_acc[i] << std::endl; 
39    } 
40 
41 
42    return 0; 
43 }

sycl::buffer / USM ポインターを介して手動で、またはエンジン記述子と呼ばれる特定のホスト側ヘルパークラスを使用して、カーネル間でエンジンを保存することがあります。

sycl::buffer に格納されたエンジンによる乱数生成の例#

エンジンは最初のカーネルで初期化されます。乱数生成は 2 番目のカーネルで実行されます。

1  #include <iostream> 
2  #include <sycl/sycl.hpp> 
3 
4 
5  #include "oneapi/mkl/rng/device.hpp" 
6 
7 
8  int main() { 
9     sycl::queue queue; 
10    const int n = 1000; 
11    const int seed = 1; 
12    const int vec_size = 4; 
13    // Prepare an array for random numbers 
14    std::vector<float> r(n); 
15    sycl::buffer<float, 1> r_buf(r.data(), r.size()); 
16    sycl::range<1> range(n / vec_size); 
17    sycl::buffer<oneapi::mkl::rng::device::mrg32k3a<vec_size>, 1> engine_buf(range); 
18 
19 
20    sycl::queue queue; 
21 
22 
23    // Kernel with initialization of engines 
24    queue.submit([&](sycl::handler& cgh) { 
25        // Create an accessor to sycl::buffer with engines to write initialized states 
26        auto engine_acc = engine_buf.template get_access<sycl::access::mode::write>(cgh); 
27        cgh.parallel_for(range, [=](sycl::item<1> item) { 
28            size_t id = item.get_id(0); 
29            // Create an engine object with offset id * 2^64 
30            oneapi::mkl::rng::device::mrg32k3a<vec_size> engine(seed, {0, id}); 
31            engine_acc[id] = engine; 
32        }); 
33    }); 
34    // Kernel for random numbers generation 
35    queue.submit([&](sycl::handler& cgh) { 
36        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); 
37        // Create an accessor to sycl::buffer with engines to read initialized states 
38        auto engine_acc = engine_buf.template get_access<sycl::access::mode::read>(cgh); 
39        cgh.parallel_for(range, [=](sycl::item<1> item) { 
40            size_t id = item.get_id(0); 
41 
42 
43            auto engine = engine_acc[id]; 
44            oneapi::mkl::rng::device::uniform distr; 
45            auto res = oneapi::mkl::rng::device::generate(distr, engine); 
46 
47 
48            res.store(id, r_acc); 
49        }); 
50    }); 
51 
52 
53    auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); 
54    std::cout << "Samples of uniform distribution" << std::endl; 
55    for(int i = 0; i < 10; i++) { 
56        std::cout << r_acc[i] << std::endl; 
57    } 
58 
59 
60    return 0; 
61 }

ホスト側ヘルパーを使用した乱数生成の例#

1  #include <iostream> 
2  #include <sycl/sycl.hpp> 
3 
4 
5  #include "oneapi/mkl/rng/device.hpp" 
6 
7 
8  int main() { 
9     sycl::queue queue; 
10    const int n = 1000; 
11    const int seed = 1; 
12    const int vec_size = 4; 
13    // prepare array for random numbers 
14    std::vector<float> r(n); 
15    sycl::buffer<float, 1> r_buf(r.data(), r.size()); 
16    sycl::range<1> range(n / vec_size); 
17 
18 
19    // offset of each engine in engine_descriptor 
20    int offset = vec_size; 
21    // each engine would be created in enqueued task as of specified range 
22    // as oneapi::mkl::rng::device::mrg32k3a<vec_size>(seed, id * offset); 
23    oneapi::mkl::rng::device::engine_descriptor<oneapi::mkl::rng::device::mrg32k3a<vec_size>>
24    descr(queue, range, seed, offset); 
25    queue.submit([&](sycl::handler& cgh) { 
26        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); 
27        // create engine_accessor 
28        auto engine_acc = descr.get_access(cgh); 
29        cgh.parallel_for(range, [=](sycl::item<1> item) { 
30            size_t id = item.get_id(0); 
31            // load engine from engine_accessor 
32            auto engine = engine_acc.load(id); 
33            oneapi::mkl::rng::device::uniform<Type> distr; 
34 
35 
36            auto res = oneapi::mkl::rng::device::generate(distr, engine); 
37 
38 
39            res.store(id, r_acc); 
40            // store engine for furter calculations if needed 
41            engine_acc.store(engine, id); 
42        }); 
43    }); 
44 
45 
46    auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); 
47    std::cout << "Samples of uniform distribution" << std::endl; 
48    for(int i = 0; i < 10; i++) { 
49        std::cout << r_acc[i] << std::endl; 
50    } 
51 
52 
53    return 0; 
54 }

ガンマ分布を用いた乱数生成の例#

1  #include <iostream> 
2  #include <vector> 
3  #include <sycl/sycl.hpp> 
4  #include "oneapi/mkl/rng/device.hpp" 
5 
6  constexpr size_t seed = 42; 
7 
8  int main() { 
9   sycl::queue queue; 
10   const size_t n = 10'000; 
11 
12   // create USM allocator 
13   sycl::usm_allocator<double, sycl::usm::alloc::shared> allocator(queue); 
14 
15   // create vector with USM allocator 
16   std::vector<double, decltype(allocator)> r_vec(n, allocator); 
17   double* r = r_vec.data(); 
18 
19   size_t reduced_val = 0; 
20   // create a reductor to calculate the sum of rejected numbers across all work-items 
21   auto reductor = sycl::reduction(&reduced_val, size_t(0), std::plus<size_t>{}); 
22 
23   queue.parallel_for(n, reductor, [=](size_t id, auto& sum) { 
24       // create basic random number generator object 
25       // 2^76 numbers for each work-item are skipped to allow acceptance rejection scheme ro reject as much numbers as it needs 
26       oneapi::mkl::rng::device::mrg32k3a<1> engine({ seed, seed, seed, seed, seed, seed }, { 0, (4096 * id) }); 
27       // create distribution object 
28       oneapi::mkl::rng::device::gamma<double> distr(0.7, 0.0, 1.0); 
29       // perform generation 
30       auto res = oneapi::mkl::rng::device::generate(distr, engine); 
31       r[id] = res; 
32 
33       // calculate how much random numbers are rejected in this work-item 
34       size_t rej = distr.count_rejected_numbers(); 
35       sum += rej; 
36   }).wait(); // synchronization can be also done by queue.wait() 
37 
38   std::cout << "First 10 gamma distributed numbers are following:"<< std::endl; 
39   for(int i = 0; i < 10; i++) { 
40       std::cout << r[i] << " "; 
41   } 
42   std::cout << std::endl; 
43 
44   std::cout << "rejected " << reduced_val <<" random numbers" << std::endl; 
45 
46   return 0; 
47 }

さらに、乱数生成器のデバイスルーチンの使用方法を示す例が以下で提供されています。

${MKL}/examples/dpcpp_device/rng/source