PrevUpHome

Home:: tiv.cc

SYCL Getting Started


SYCL Getting Started

SYCL

Setup ComputeCPP ArchLinux

Install computecpp

computecpp_info

First Program

#include <SYCL/sycl.hpp>
#include <iostream>

int main()
{
	sycl::float4 a{1,2,3,4};
	sycl::float4 b{5,6,7,8};
	sycl::float4 c{4,3,2,1};

	sycl::default_selector selector;

	sycl::queue queue{selector};
	std::cout << queue.get_device().get_info<sycl::info::device::name>() << std::endl;

	{ // Scope Starts
		sycl::buffer<sycl::float4, 1> x{&a, sycl::range<1>{1}};
		sycl::buffer<sycl::float4, 1> y{&b, sycl::range<1>{1}};
		sycl::buffer<sycl::float4, 1> z{&c, sycl::range<1>{1}};

		// Execute Kernel
		queue.submit(
			[&] (sycl::handler & handler)
			{
				auto a_acc = x.get_access<sycl::access::mode::read>(handler);
				auto b_acc = y.get_access<sycl::access::mode::read>(handler);
				auto c_acc = z.get_access<sycl::access::mode::discard_write>(handler);
	
				handler.single_task<class some_name>(
					[=] ()
					{
						c_acc[0] = a_acc[0] + b_acc[0];
					}
				);
			}
		);
	} // Scope Ends

	auto print = [] (auto & v)
	{
		std::cout << "=> " << v.x() << ' ' << v.y() << ' ' << v.z() << ' ' << v.w() << '\n';
	};
	print(a);
	print(b);
	print(c);
}

How to compile

compute hello.cpp -std=c++17 -sycl-driver -I./path/to/sycl/include -L./path/to/sycl/lib -lComputeCpp -o hello

Link:

https://developer.codeplay.com/products/computecpp/ce/2.11.0/guides/sycl-guide/hello-sycl

SYCL Parallel For

SYCL parallel for loop.

#include <SYCL/sycl.hpp>
#include <vector>
#include <iostream>

int main()
{
	std::vector<float> vector(10, 1.0f);

	auto queue = sycl::queue{sycl::default_selector{}};

	{ // scope begins
		auto buffer = sycl::buffer<float, 1>{vector.data(), sycl::range<1>{vector.size()}};

		queue.submit(
			[&] (sycl::handler & handler)
			{
				auto access = buffer.get_access<sycl::access::mode::read_write>(handler);
				handler.parallel_for<class name>(
					sycl::range<1>{vector.size()},
					[=] (sycl::id<1> i)
					{
						access[i] = i[0] * i[0] * 0.5f;
					}
				);
			}
		);
	} // scope ends

	for (auto & value: vector) std::cout << value << ' ';
	// 0 0.5 2 4.5 8 12.5 18 24.5 32 40.5

	std::cout << std::endl;
}

Access Single Value

#include <SYCL/sycl.hpp>
#include <iostream>

int main()
{
	float value{};

	auto queue = sycl::queue{sycl::default_selector{}};

	{
		auto buffer = sycl::buffer<float, 1>{&value, sycl::range<1>{1}};

		queue.submit(
			[&] (sycl::handler & handler)
			{
				auto access =buffer.get_access<sycl::access::mode::read_write>(handler);
				handler.single_task<class name>(
					[=]
					{
						access[0] = 9.8f;
					}
				);
			}
		);
	}

	std::cout << value << std::endl; // 9.8
}

SYCL Selector Types

Default will prefer GPU .

Parallel For Vector Addition

#include <SYCL/sycl.hpp>
#include <vector>
#include <algorithm>
#include <boost/phoenix.hpp>

using boost::phoenix::arg_names::arg1;

int main()
{
	std::vector<float> v1{1,2,3,4,5};
	std::vector<float> v2{2.1, 3.1, 4.1, 5.1, 6.1};
	std::vector<float> output(v1.size());

	auto queue = sycl::queue{sycl::gpu_selector{}};

	{
		auto v1_buffer = sycl::buffer<float, 1>{v1.data(), sycl::range<1>{v1.size()}};
		auto v2_buffer = sycl::buffer<float, 1>{v2.data(), sycl::range<1>{v2.size()}};
		auto output_buffer = sycl::buffer<float, 1>{output.data(), sycl::range<1>{output.size()}};

		queue.submit(
			[&] (sycl::handler & handler)
			{
				auto v1_access = v1_buffer.get_access<sycl::access::mode::read>(handler);
				auto v2_access = v2_buffer.get_access<sycl::access::mode::read>(handler);
				auto output_access = output_buffer.get_access<sycl::access::mode::write>(handler);
				handler.parallel_for<class name>(
					sycl::range<1>{output.size()},
					[=] (sycl::id<1> id)
					{
						const int i = id[0];
						output_access[i] = v1_access[i] + v2_access[i];
					}
				);
			}
		);
	}

	std::for_each(output.begin(), output.end(), std::cout << arg1 << ' ');
	// 3.1 5.1 7.1 9.1 11.1
	std::cout << std::endl;
}

Template Function Objects

#include <SYCL/sycl.hpp>
#include <iostream>
#include <vector>

using size_type = unsigned long int;

template <typename access_type>
class function
{
private:
	access_type access;
	size_type size;
public:
	function(access_type access, size_type size):
		access{access},
		size{size}
	{}
	void operator()()
	{
		for (size_type i=0; i<size; i++)
		{
			access[i] *= access[i];
		}
	}
};

int main()
{
	std::vector<float> vector{1.3, 2.3, 3.3, 4.3, 5.3};
	sycl::queue queue{sycl::gpu_selector{}};

	auto * buffer = new sycl::buffer<float, 1>{vector.data(), sycl::range<1>{vector.size()}};

	queue.submit(
		[&] (sycl::handler & handler)
		{
			auto access = buffer->get_access<sycl::access::mode::read_write>(handler);
			handler.single_task<class name>(
				function<decltype(access)>{access, vector.size()}
			);
		}
	);

	delete buffer;

	for (auto & x: vector) std::cout << x << ' '; // 1.69 5.29 10.89 18.49 28.09
	std::cout << std::endl;
}

SYCL Multiple Kernels

#include <SYCL/sycl.hpp>
#include <iostream>
#include <vector>

int main()
{
	std::vector<float> v1{1.3, 2.4, 3.5, 4.6, 5.7, 6.8};
	std::vector<float> v2(v1.size());
	std::vector<float> v3(v1.size());

	auto queue = sycl::queue{sycl::default_selector{}};

	{ // scope begins
		auto v1_buffer = sycl::buffer<float, 1> {v1.data(), sycl::range<1>{v1.size()}};
		auto v2_buffer = sycl::buffer<float, 1> {v2.data(), sycl::range<1>{v2.size()}};
		auto v3_buffer = sycl::buffer<float, 1> {v3.data(), sycl::range<1>{v3.size()}};

		// Read v1; Write v2
		queue.submit(
			[&] (sycl::handler & handler)
			{
				auto v1_access = v1_buffer.get_access<sycl::access::mode::read>(handler);
				auto v2_access = v2_buffer.get_access<sycl::access::mode::write>(handler);

				handler.parallel_for<class v1_to_v2>(
					sycl::range<1>{v2.size()},
					[=] (sycl::id<1> id)
					{
						const int i = id[0];
						v2_access[i] = v1_access[i];
					}
				);
			}
		);

		// Read v2; Write v3
		queue.submit(
			[&] (sycl::handler & handler)
			{
				auto v2_access = v2_buffer.get_access<sycl::access::mode::read>(handler);
				auto v3_access = v3_buffer.get_access<sycl::access::mode::write>(handler);

				handler.parallel_for<class v2_to_v3>(
					sycl::range<1>{v3.size()},
					[=] (sycl::id<1> id)
					{
						const int i = id[0];
						v3_access[i] = v2_access[i];
					}
				);
			}
		);
	} // scope ends

	for (auto & x: v3) std::cout << x << ' '; // 1.3 2.4 3.5 4.6 5.7 6.8
	std::cout << std::endl;
}

Last revised: March 27, 2023 at 03:21:19 GMT


PrevUpHome