From 19043319d9e7fc66e963cfe3ad7ee74fc248f7b6 Mon Sep 17 00:00:00 2001 From: Drew Hubley Date: Sat, 4 Jan 2025 14:34:16 -0400 Subject: [PATCH] Added skeleton of batch based GPU assignment --- CMakeLists.txt | 1 + include/xtensor/xassign.hpp | 14 +++++- include/xtensor/xcontainer.hpp | 16 +++++++ include/xtensor/xdevice.hpp | 86 ++++++++++++++++++++++++++++++++++ include/xtensor/xfunction.hpp | 20 ++++++++ include/xtensor/xmath.hpp | 54 +++++++++++++++------ include/xtensor/xoperation.hpp | 5 ++ test/CMakeLists.txt | 1 + test/test_xdevice_assign.cpp | 39 +++++++++++++++ 9 files changed, 220 insertions(+), 16 deletions(-) create mode 100644 include/xtensor/xdevice.hpp create mode 100644 test/test_xdevice_assign.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 67c58f083..0eb7c079b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,7 @@ set(XTENSOR_HEADERS ${XTENSOR_INCLUDE_DIR}/xtensor/xcomplex.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xcontainer.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xcsv.hpp + ${XTENSOR_INCLUDE_DIR}/xtensor/xdevice.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xdynamic_view.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xeval.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xexception.hpp diff --git a/include/xtensor/xassign.hpp b/include/xtensor/xassign.hpp index 2ec698ae6..7c84c7a7c 100644 --- a/include/xtensor/xassign.hpp +++ b/include/xtensor/xassign.hpp @@ -168,6 +168,17 @@ namespace xt static void run_impl(E1& e1, const E2& e2, std::false_type); }; + class device_assigner + { + public: + + template + static void run(E1& e1, const E2& e2) + { + e1.store_device(e2.load_device()); + } + }; + /************************* * strided_loop_assigner * *************************/ @@ -463,7 +474,8 @@ namespace xt // in compilation error for expressions that do not provide a SIMD interface. // simd_assign is true if simd_linear_assign() or simd_linear_assign(de1, de2) // is true. - linear_assigner::run(de1, de2); + //linear_assigner::run(de1, de2); + device_assigner::run(de1, de2); } else { diff --git a/include/xtensor/xcontainer.hpp b/include/xtensor/xcontainer.hpp index 46dd0cf1b..921c4b446 100644 --- a/include/xtensor/xcontainer.hpp +++ b/include/xtensor/xcontainer.hpp @@ -27,6 +27,7 @@ #include "xstrides.hpp" #include "xtensor_config.hpp" #include "xtensor_forward.hpp" +#include "xdevice.hpp" namespace xt { @@ -112,6 +113,8 @@ namespace xt using reverse_linear_iterator = typename iterable_base::reverse_linear_iterator; using const_reverse_linear_iterator = typename iterable_base::const_reverse_linear_iterator; + using container_device_return_type_t = host_device_batch; + static_assert(static_layout != layout_type::any, "Container layout can never be layout_type::any!"); size_type size() const noexcept; @@ -187,6 +190,19 @@ namespace xt container_simd_return_type_t /*simd_return_type*/ load_simd(size_type i) const; + template + void store_device(device_batch&& e) + { + //check length matching + e.store_host(storage().data()); + } + + container_device_return_type_t load_device() const + { + auto ptr = data(); + return container_device_return_type_t(ptr, size()); + } + linear_iterator linear_begin() noexcept; linear_iterator linear_end() noexcept; diff --git a/include/xtensor/xdevice.hpp b/include/xtensor/xdevice.hpp new file mode 100644 index 000000000..470e9376c --- /dev/null +++ b/include/xtensor/xdevice.hpp @@ -0,0 +1,86 @@ +#ifndef XTENSOR_DEVICE_HPP +#define XTENSOR_DEVICE_HPP + +#include +#include +#include +#include + +namespace xt{ + namespace detail{ + + } + /** + * Device implementation for the various operations. All device specific code goes in here disabled via macro + * for invalid syntax which might be needed for Sycl or CUDA. + */ +//#ifdef XTENSOR_DEVICE_ASSIGN + template + class host_device_batch + { + public: + host_device_batch(const T* ptr, std::size_t size) + { + //copy the data to the device + //CUDA Impl = Nearly identical + m_data.resize(size); + std::copy(ptr, ptr + size, std::begin(m_data)); + } + template + host_device_batch& operator+(const host_device_batch& rhs) + { + //CUDA impl = thrust::transform(m_data.begin(), m_data.end(), rhs.m_data().begin(), m_data.end(), thrust::plus{}); + std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::plus{}); + return *this; + } + template + host_device_batch& operator-(const host_device_batch& rhs) + { + std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::minus{}); + return *this; + } + template + host_device_batch& operator*(const host_device_batch& rhs) + { + std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::multiplies{}); + return *this; + } + template + host_device_batch& operator/(const host_device_batch& rhs) + { + std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::divides{}); + return *this; + } + void store_host(T* dst) + { + std::copy(std::begin(m_data), std::end(m_data), dst); + } + private: + //CUDA impl = thrust::device_vector m_data; + std::vector m_data; + }; +//#endif + + // template + // class cuda_device_batch : public batch> + // { + // public: + + // }; + + // template + // class intel_device_batch : public batch> + // { + // public: + + // }; + + // template + // class opencl_device_batch : public batch> + // { + // public: + + // }; +} + +#endif diff --git a/include/xtensor/xfunction.hpp b/include/xtensor/xfunction.hpp index f11362cdb..c9d5362c0 100644 --- a/include/xtensor/xfunction.hpp +++ b/include/xtensor/xfunction.hpp @@ -31,6 +31,7 @@ #include "xstrides.hpp" #include "xtensor_simd.hpp" #include "xutils.hpp" +#include "xdevice.hpp" namespace xt { @@ -283,6 +284,7 @@ namespace xt using const_iterator = typename iterable_base::const_iterator; using reverse_iterator = typename iterable_base::reverse_iterator; using const_reverse_iterator = typename iterable_base::const_reverse_iterator; + using device_return_type = host_device_batch; template , self_type>::value>> xfunction(Func&& f, CTA&&... e) noexcept; @@ -361,6 +363,8 @@ namespace xt template ::size> simd_return_type load_simd(size_type i) const; + device_return_type load_device() const; + const tuple_type& arguments() const noexcept; const functor_type& functor() const noexcept; @@ -385,6 +389,9 @@ namespace xt template auto load_simd_impl(std::index_sequence, size_type i) const; + template + inline auto load_device_impl(std::index_sequence) const; + template const_stepper build_stepper(Func&& f, std::index_sequence) const noexcept; @@ -844,6 +851,12 @@ namespace xt return operator()(); } + template + inline auto xfunction::load_device() const -> device_return_type + { + return load_device_impl(std::make_index_sequence()); + } + template template inline auto xfunction::load_simd(size_type i) const -> simd_return_type @@ -912,6 +925,13 @@ namespace xt return m_f.simd_apply((std::get(m_e).template load_simd(i))...); } + template + template + inline auto xfunction::load_device_impl(std::index_sequence) const + { + return m_f.device_apply((std::get(m_e).load_device())...); + } + template template inline auto xfunction::build_stepper(Func&& f, std::index_sequence) const noexcept diff --git a/include/xtensor/xmath.hpp b/include/xtensor/xmath.hpp index d7ab0820a..2deae2bbf 100644 --- a/include/xtensor/xmath.hpp +++ b/include/xtensor/xmath.hpp @@ -81,21 +81,27 @@ namespace xt XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned long long); -#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \ - struct NAME##_fun \ - { \ - template \ - constexpr auto operator()(const T& arg) const \ - { \ - using math::NAME; \ - return NAME(arg); \ - } \ - template \ - constexpr auto simd_apply(const B& arg) const \ - { \ - using math::NAME; \ - return NAME(arg); \ - } \ +#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \ + struct NAME##_fun \ + { \ + template \ + constexpr auto operator()(const T& arg) const \ + { \ + using math::NAME; \ + return NAME(arg); \ + } \ + template \ + constexpr auto simd_apply(const B& arg) const \ + { \ + using math::NAME; \ + return NAME(arg); \ + } \ + template \ + constexpr auto device_apply(const B& arg) const \ + { \ + using math::NAME; \ + return NAME(arg); \ + } \ } #define XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING(NAME) \ @@ -113,6 +119,12 @@ namespace xt using math::NAME; \ return NAME(arg); \ } \ + template \ + constexpr auto device_apply(const B& arg) const \ + { \ + using math::NAME; \ + return NAME(arg); \ + } \ } #define XTENSOR_BINARY_MATH_FUNCTOR(NAME) \ @@ -130,6 +142,12 @@ namespace xt using math::NAME; \ return NAME(arg1, arg2); \ } \ + template \ + constexpr auto device_apply(const B& arg1, const B& arg2) const \ + { \ + using math::NAME; \ + return NAME(arg1, arg2); \ + } \ } #define XTENSOR_TERNARY_MATH_FUNCTOR(NAME) \ @@ -147,6 +165,12 @@ namespace xt using math::NAME; \ return NAME(arg1, arg2, arg3); \ } \ + template \ + auto device_apply(const B& arg1, const B& arg2, const B& arg3) const \ + { \ + using math::NAME; \ + return NAME(arg1, arg2, arg3); \ + } \ } namespace math diff --git a/include/xtensor/xoperation.hpp b/include/xtensor/xoperation.hpp index 44d639130..4010a7e12 100644 --- a/include/xtensor/xoperation.hpp +++ b/include/xtensor/xoperation.hpp @@ -79,6 +79,11 @@ namespace xt { \ return (arg1 OP arg2); \ } \ + template \ + constexpr auto device_apply(B&& arg1, const B&& arg2) const \ + { \ + return (arg1 OP arg2); \ + } \ } namespace detail diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 19a03dc4d..38659c3ad 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -183,6 +183,7 @@ set(XTENSOR_TESTS test_xcomplex.cpp test_xcsv.cpp test_xdatesupport.cpp + test_xdevice_assign.cpp test_xdynamic_view.cpp test_xfunctor_adaptor.cpp test_xfixed.cpp diff --git a/test/test_xdevice_assign.cpp b/test/test_xdevice_assign.cpp new file mode 100644 index 000000000..38468e29d --- /dev/null +++ b/test/test_xdevice_assign.cpp @@ -0,0 +1,39 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht * + * Copyright (c) QuantStack * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ +// This file is generated from test/files/cppy_source/test_extended_broadcast_view.cppy by preprocess.py! +// Warning: This file should not be modified directly! Instead, modify the `*.cppy` file. + + +#include + +#include "xtensor/xarray.hpp" +#include "xtensor/xfixed.hpp" +#include "xtensor/xnoalias.hpp" +#include "xtensor/xstrided_view.hpp" +#include "xtensor/xtensor.hpp" +#include "xtensor/xview.hpp" + +#include "test_common_macros.hpp" + +namespace xt +{ + TEST(test_xdevice, basic_xfunction) + { + std::vector expectation = {2,3,4,5,6}; + + xt::xarray a = {1., 2., 3., 4., 5.}; + xt::xarray b = xt::ones_like(a); + auto c = xt::xtensor::from_shape(a.shape()); + c = a + b; + for(size_t i = 0; i < expectation.size(); i++) + { + ASSERT_EQ(c(i), expectation.at(i)); + } + } +}