oneMKL RNG デバイス利用モデル#
デバイスルーチンの一般的な使用モデルは、oneMKL RNG 利用モデルで説明されているものと同じです:
基本的な乱数生成器のオブジェクトを作成して初期化します。
配布生成器のオブジェクトを作成して初期化します。
適切な統計分布を持つ乱数を取得するには、生成ルーチンを呼び出します。
スカラー乱数生成の例#
1 #include <iostream>
2 #include <sycl/sycl.hpp>
3 #include "oneapi/mkl/rng/device.hpp"
4
5
6 int main() {
7 sycl::queue queue;
8 const int n = 1000;
9 const int seed = 1;
10 // Prepare an array for random numbers
11 std::vector<float> r(n);
12
13
14 sycl::buffer<float, 1> r_buf(r.data(), r.size());
15 // Submit a kernel to generate on device
16 queue.submit([&](sycl::handler& cgh) {
17 auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
18 cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> item) {
19 // Create an engine object
20 oneapi::mkl::rng::device::philox4x32x10<> engine(seed, item.get_id(0));
21 // Create a distribution object
22 oneapi::mkl::rng::device::uniform<> distr;
23 // Call generate function to obtain scalar random number
24 float res = oneapi::mkl::rng::device::generate(distr, engine);
25
26
27 r_acc[item.get_id(0)] = res;
28 });
29 });
30
31
32 auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
33 std::cout << "Samples of uniform distribution" << std::endl;
34 for(int i = 0; i < 10; i++) {
35 std::cout << r_acc[i] << std::endl;
36 }
37
38
39 return 0;
40 }ベクトル乱数生成の例#
1 #include <iostream>
2 #include <sycl/sycl.hpp> 3
4
5 #include "oneapi/mkl/rng/device.hpp"
6
7
8 int main() {
9 sycl::queue queue;
10 const int n = 1000;
11 const int seed = 1;
12 const int vec_size = 4;
13 // Prepare an array for random numbers
14 std::vector<float> r(n);
15
16
17 sycl::buffer<float, 1> r_buf(r.data(), r.size());
18 // Submit a kernel to generate on device
19 sycl::queue{}.submit([&](sycl::handler& cgh) {
20 auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
21 cgh.parallel_for(sycl::range<1>(n / vec_size), [=](sycl::item<1> item) {
22 // Create an engine object
23 oneapi::mkl::rng::device::philox4x32x10<vec_size> engine(seed, item.get_id(0) * vec_size);
24 // Create a distribution object
25 oneapi::mkl::rng::device::uniform<> distr;
26 // Call generate function to obtain sycl::vec<float, 4> with random numbers
27 auto res = oneapi::mkl::rng::device::generate(distr, engine);
28
29
30 res.store(ite.get_id(0), r_acc);
31 });
32 });
33
34
35 auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
36 std::cout << "Samples of uniform distribution" << std::endl;
37 for(int i = 0; i < 10; i++) {
38 std::cout << r_acc[i] << std::endl;
39 }
40
41
42 return 0;
43 }sycl::buffer / USM ポインターを介して手動で、またはエンジン記述子と呼ばれる特定のホスト側ヘルパークラスを使用して、カーネル間でエンジンを保存することがあります。
sycl::buffer に格納されたエンジンによる乱数生成の例#
エンジンは最初のカーネルで初期化されます。乱数生成は 2 番目のカーネルで実行されます。
1 #include <iostream>
2 #include <sycl/sycl.hpp>
3
4
5 #include "oneapi/mkl/rng/device.hpp"
6
7
8 int main() {
9 sycl::queue queue;
10 const int n = 1000;
11 const int seed = 1;
12 const int vec_size = 4;
13 // Prepare an array for random numbers
14 std::vector<float> r(n);
15 sycl::buffer<float, 1> r_buf(r.data(), r.size());
16 sycl::range<1> range(n / vec_size);
17 sycl::buffer<oneapi::mkl::rng::device::mrg32k3a<vec_size>, 1> engine_buf(range);
18
19
20 sycl::queue queue;
21
22
23 // Kernel with initialization of engines
24 queue.submit([&](sycl::handler& cgh) {
25 // Create an accessor to sycl::buffer with engines to write initialized states
26 auto engine_acc = engine_buf.template get_access<sycl::access::mode::write>(cgh);
27 cgh.parallel_for(range, [=](sycl::item<1> item) {
28 size_t id = item.get_id(0);
29 // Create an engine object with offset id * 2^64
30 oneapi::mkl::rng::device::mrg32k3a<vec_size> engine(seed, {0, id});
31 engine_acc[id] = engine;
32 });
33 });
34 // Kernel for random numbers generation
35 queue.submit([&](sycl::handler& cgh) {
36 auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
37 // Create an accessor to sycl::buffer with engines to read initialized states
38 auto engine_acc = engine_buf.template get_access<sycl::access::mode::read>(cgh);
39 cgh.parallel_for(range, [=](sycl::item<1> item) {
40 size_t id = item.get_id(0);
41
42
43 auto engine = engine_acc[id];
44 oneapi::mkl::rng::device::uniform distr;
45 auto res = oneapi::mkl::rng::device::generate(distr, engine);
46
47
48 res.store(id, r_acc);
49 });
50 });
51
52
53 auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
54 std::cout << "Samples of uniform distribution" << std::endl;
55 for(int i = 0; i < 10; i++) {
56 std::cout << r_acc[i] << std::endl;
57 }
58
59
60 return 0;
61 }ホスト側ヘルパーを使用した乱数生成の例#
1 #include <iostream>
2 #include <sycl/sycl.hpp>
3
4
5 #include "oneapi/mkl/rng/device.hpp"
6
7
8 int main() {
9 sycl::queue queue;
10 const int n = 1000;
11 const int seed = 1;
12 const int vec_size = 4;
13 // prepare array for random numbers
14 std::vector<float> r(n);
15 sycl::buffer<float, 1> r_buf(r.data(), r.size());
16 sycl::range<1> range(n / vec_size);
17
18
19 // offset of each engine in engine_descriptor
20 int offset = vec_size;
21 // each engine would be created in enqueued task as of specified range
22 // as oneapi::mkl::rng::device::mrg32k3a<vec_size>(seed, id * offset);
23 oneapi::mkl::rng::device::engine_descriptor<oneapi::mkl::rng::device::mrg32k3a<vec_size>>
24 descr(queue, range, seed, offset);
25 queue.submit([&](sycl::handler& cgh) {
26 auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
27 // create engine_accessor
28 auto engine_acc = descr.get_access(cgh);
29 cgh.parallel_for(range, [=](sycl::item<1> item) {
30 size_t id = item.get_id(0);
31 // load engine from engine_accessor
32 auto engine = engine_acc.load(id);
33 oneapi::mkl::rng::device::uniform<Type> distr;
34
35
36 auto res = oneapi::mkl::rng::device::generate(distr, engine);
37
38
39 res.store(id, r_acc);
40 // store engine for furter calculations if needed
41 engine_acc.store(engine, id);
42 });
43 });
44
45
46 auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
47 std::cout << "Samples of uniform distribution" << std::endl;
48 for(int i = 0; i < 10; i++) {
49 std::cout << r_acc[i] << std::endl;
50 }
51
52
53 return 0;
54 }ガンマ分布を用いた乱数生成の例#
1 #include <iostream>
2 #include <vector>
3 #include <sycl/sycl.hpp>
4 #include "oneapi/mkl/rng/device.hpp"
5
6 constexpr size_t seed = 42;
7
8 int main() {
9 sycl::queue queue;
10 const size_t n = 10'000;
11
12 // create USM allocator
13 sycl::usm_allocator<double, sycl::usm::alloc::shared> allocator(queue);
14
15 // create vector with USM allocator
16 std::vector<double, decltype(allocator)> r_vec(n, allocator);
17 double* r = r_vec.data();
18
19 size_t reduced_val = 0;
20 // create a reductor to calculate the sum of rejected numbers across all work-items
21 auto reductor = sycl::reduction(&reduced_val, size_t(0), std::plus<size_t>{});
22
23 queue.parallel_for(n, reductor, [=](size_t id, auto& sum) {
24 // create basic random number generator object
25 // 2^76 numbers for each work-item are skipped to allow acceptance rejection scheme ro reject as much numbers as it needs
26 oneapi::mkl::rng::device::mrg32k3a<1> engine({ seed, seed, seed, seed, seed, seed }, { 0, (4096 * id) });
27 // create distribution object
28 oneapi::mkl::rng::device::gamma<double> distr(0.7, 0.0, 1.0);
29 // perform generation
30 auto res = oneapi::mkl::rng::device::generate(distr, engine);
31 r[id] = res;
32
33 // calculate how much random numbers are rejected in this work-item
34 size_t rej = distr.count_rejected_numbers();
35 sum += rej;
36 }).wait(); // synchronization can be also done by queue.wait()
37
38 std::cout << "First 10 gamma distributed numbers are following:"<< std::endl;
39 for(int i = 0; i < 10; i++) {
40 std::cout << r[i] << " ";
41 }
42 std::cout << std::endl;
43
44 std::cout << "rejected " << reduced_val <<" random numbers" << std::endl;
45
46 return 0;
47 }さらに、乱数生成器のデバイスルーチンの使用方法を示す例が以下で提供されています。
${MKL}/examples/dpcpp_device/rng/source