From 5f8093457e366ab9a7d6e990a604c8cc7dcb6770 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:29:12 +0200 Subject: [PATCH 01/21] Introduce BlockSize concept --- cpp/dolfinx/common/types.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 318b1c86060..9ac581fca5f 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -41,4 +41,35 @@ using scalar_value_t = typename scalar_value::type; /// @private mdspan/mdarray namespace namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; +/// @private Constant of maximum compile time optimized block sizes. +constexpr int MaxOptimizedBlockSize = 3; + +/// @private Concept capturing both compile time defined block sizes and runtime +/// ones. +template +concept BlockSize + = std::is_same_v || (requires { + typename T::value_type; + requires std::is_same_v; + requires T::value >= 1 && T::value <= MaxOptimizedBlockSize; + }); + +/// @private Check if block size is a compile time constant. +template +constexpr bool is_compile_time_v = !std::is_same_v; + +/// @private Check if block size is a run time constant. +template +constexpr bool is_runtime_v = std::is_same_v; + +/// @private Retrieves the integral block size of a runtime or compile time +/// block size. +int block_size(BlockSize auto bs) +{ + if constexpr (is_compile_time_v) + return decltype(bs)::value; + + return bs; +} + } // namespace dolfinx From 65ff61f72128762b985ff18cdbe6b4fafecabb83 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:47:03 +0200 Subject: [PATCH 02/21] Use BlockSize in packing --- cpp/dolfinx/fem/pack.h | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 7d8a30f7ff6..2033bbb100d 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -12,6 +12,7 @@ #include "Form.h" #include "Function.h" #include "FunctionSpace.h" +#include "dolfinx/common/types.h" #include "traits.h" #include #include @@ -51,29 +52,19 @@ get_cell_orientation_info(const Function& coefficient) } /// Pack a single coefficient for a single cell -template -void pack_impl(std::span coeffs, std::int32_t cell, int bs, +template +void pack_impl(std::span coeffs, std::int32_t cell, BlockSize auto _bs, std::span v, std::span cell_info, const DofMap& dofmap, auto transform) { std::span dofs = dofmap.cell_dofs(cell); for (std::size_t i = 0; i < dofs.size(); ++i) { - if constexpr (_bs < 0) - { - const int pos_c = bs * i; - const int pos_v = bs * dofs[i]; - for (int k = 0; k < bs; ++k) - coeffs[pos_c + k] = v[pos_v + k]; - } - else - { - assert(_bs == bs); - const int pos_c = _bs * i; - const int pos_v = _bs * dofs[i]; - for (int k = 0; k < _bs; ++k) - coeffs[pos_c + k] = v[pos_v + k]; - } + int bs = block_size(_bs); + const int pos_c = bs * i; + const int pos_v = bs * dofs[i]; + for (int k = 0; k < bs; ++k) + coeffs[pos_c + k] = v[pos_v + k]; } transform(coeffs, cell_info, cell, 1); @@ -117,8 +108,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<1>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, std::integral_constant(), v, + cell_info, dofmap, transformation); } } break; @@ -128,8 +119,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<2>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, std::integral_constant(), v, + cell_info, dofmap, transformation); } } break; @@ -139,8 +130,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<3>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, std::integral_constant(), v, + cell_info, dofmap, transformation); } } break; @@ -150,8 +141,7 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<-1>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, bs, v, cell_info, dofmap, transformation); } } break; From a708e7d82b6973cfcf141e8d33ce5a872831a0b1 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 17:13:37 +0200 Subject: [PATCH 03/21] Use BlockSize in vector assembly --- cpp/dolfinx/fem/assemble_vector_impl.h | 234 ++++++++++--------------- 1 file changed, 88 insertions(+), 146 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 0e32e12fc23..d9ccc003bcf 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -10,6 +10,7 @@ #include "DirichletBC.h" #include "DofMap.h" #include "Form.h" +#include "dolfinx/common/types.h" #include "traits.h" #include "utils.h" #include @@ -23,6 +24,7 @@ #include #include #include +#include #include namespace dolfinx::fem @@ -76,16 +78,16 @@ using mdspan2_t = md::mdspan>; /// conditions applied. /// @param[in] x0 Vector used in the lifting. /// @param[in] alpha Scaling to apply. -template +template void _lift_bc_cells( std::span b, mdspan2_t x_dofmap, md::mdspan, md::extents> x, FEkernel auto kernel, std::span cells, - std::tuple> dofmap0, + std::tuple> dofmap0, fem::DofTransformKernel auto P0, - std::tuple> dofmap1, + std::tuple> dofmap1, fem::DofTransformKernel auto P1T, std::span constants, md::mdspan> coeffs, std::span cell_info0, @@ -95,10 +97,11 @@ void _lift_bc_cells( if (cells.empty()) return; - const auto [dmap0, bs0, cells0] = dofmap0; - const auto [dmap1, bs1, cells1] = dofmap1; - assert(_bs0 < 0 or _bs0 == bs0); - assert(_bs1 < 0 or _bs1 == bs1); + const auto [dmap0, _bs0, cells0] = dofmap0; + const auto [dmap1, _bs1, cells1] = dofmap1; + + int bs0 = block_size(_bs0); + int bs1 = block_size(_bs1); // Data structures used in bc application std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -120,28 +123,13 @@ void _lift_bc_cells( bool has_bc = false; for (std::size_t j = 0; j < dofs1.size(); ++j) { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - assert(_bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[_bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - else + for (int k = 0; k < bs1; ++k) { - for (int k = 0; k < bs1; ++k) + assert(bs1 * dofs1[j] + k < (int)bc_markers1.size()); + if (bc_markers1[bs1 * dofs1[j] + k]) { - assert(bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } + has_bc = true; + break; } } } @@ -172,53 +160,27 @@ void _lift_bc_cells( std::ranges::fill(be, 0); for (std::size_t j = 0; j < dofs1.size(); ++j) { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - const std::int32_t jj = _bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // const T _x0 = 0; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + _bs1 * j + k] * alpha * (bc - _x0); - } - } - } - else + + for (int k = 0; k < bs1; ++k) { - for (int k = 0; k < bs1; ++k) + const std::int32_t jj = bs1 * dofs1[j] + k; + assert(jj < (int)bc_markers1.size()); + if (bc_markers1[jj]) { - const std::int32_t jj = bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } + const T bc = bc_values1[jj]; + const T _x0 = x0.empty() ? 0 : x0[jj]; + // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); + for (int m = 0; m < num_rows; ++m) + be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); } } } for (std::size_t i = 0; i < dofs0.size(); ++i) { - if constexpr (_bs0 > 0) - { - for (int k = 0; k < _bs0; ++k) - b[_bs0 * dofs0[i] + k] += be[_bs0 * i + k]; - } - else - { - for (int k = 0; k < bs0; ++k) - b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; - } + + for (int k = 0; k < bs0; ++k) + b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; } } } @@ -638,14 +600,14 @@ void _lift_bc_interior_facets( /// coefficient for cell `i`. /// @param[in] cell_info0 Cell permutation information for the test /// function mesh. -template +template void assemble_cells( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, md::extents> x, std::span cells, - std::tuple> dofmap, + std::tuple> dofmap, FEkernel auto kernel, std::span constants, md::mdspan> coeffs, std::span cell_info0) @@ -653,8 +615,8 @@ void assemble_cells( if (cells.empty()) return; - const auto [dmap, bs, cells0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, cells0] = dofmap; + int bs = block_size(_bs); // Create data structures used in assembly std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -681,18 +643,9 @@ void assemble_cells( // Scatter cell vector to 'global' vector array auto dofs = md::submdspan(dmap, c0, md::full_extent); - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dofs[i] + k] += be[_bs * i + k]; - } - else - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dofs[i] + k] += be[bs * i + k]; - } + for (std::size_t i = 0; i < dofs.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dofs[i] + k] += be[bs * i + k]; } } @@ -720,7 +673,7 @@ void assemble_cells( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_exterior_facets( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, @@ -729,7 +682,7 @@ void assemble_exterior_facets( md::mdspan> facets, - std::tuple>> dofmap, @@ -741,8 +694,8 @@ void assemble_exterior_facets( if (facets.empty()) return; - const auto [dmap, bs, facets0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, facets0] = dofmap; + int bs = block_size(_bs); // Create data structures used in assembly const int num_dofs = dmap.extent(1); @@ -775,18 +728,9 @@ void assemble_exterior_facets( // Add element vector to global vector auto dofs = md::submdspan(dmap, cell0, md::full_extent); - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dofs[i] + k] += be[_bs * i + k]; - } - else - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dofs[i] + k] += be[bs * i + k]; - } + for (std::size_t i = 0; i < dofs.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dofs[i] + k] += be[bs * i + k]; } } @@ -813,7 +757,7 @@ void assemble_exterior_facets( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_interior_facets( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, @@ -822,7 +766,7 @@ void assemble_interior_facets( md::mdspan> facets, - std::tuple>> dofmap, @@ -836,8 +780,8 @@ void assemble_interior_facets( if (facets.empty()) return; - const auto [dmap, bs, facets0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, facets0] = dofmap; + int bs = block_size(_bs); // Create data structures used in assembly using X = scalar_value_t; @@ -886,24 +830,12 @@ void assemble_interior_facets( P0(sub_be, cell_info0, cells0[1], 1); // Add element vector to global vector - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dmap0.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dmap0[i] + k] += be[_bs * i + k]; - for (std::size_t i = 0; i < dmap1.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dmap1[i] + k] += be[_bs * (i + dmap0.size()) + k]; - } - else - { - for (std::size_t i = 0; i < dmap0.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dmap0[i] + k] += be[bs * i + k]; - for (std::size_t i = 0; i < dmap1.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dmap1[i] + k] += be[bs * (i + dmap0.size()) + k]; - } + for (std::size_t i = 0; i < dmap0.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dmap0[i] + k] += be[bs * i + k]; + for (std::size_t i = 0; i < dmap1.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dmap1[i] + k] += be[bs * (i + dmap0.size()) + k]; } } @@ -988,23 +920,28 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { - _lift_bc_cells( - b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, + std::integral_constant>( + b, x_dofmap, x, kernel, cells, + {dofmap0, std::integral_constant(), cells0}, P0, + {dofmap1, std::integral_constant(), cells1}, P1T, constants, + coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else if (bs0 == 3 and bs1 == 3) { - _lift_bc_cells( - b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, + std::integral_constant>( + b, x_dofmap, x, kernel, cells, + {dofmap0, std::integral_constant(), cells0}, P0, + {dofmap1, std::integral_constant(), cells1}, P1T, constants, + coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells( + b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, + {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } } @@ -1221,19 +1158,21 @@ void assemble_vector( assert(cells.size() * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, + {dofs, std::integral_constant(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else if (bs == 3) { - impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, + {dofs, std::integral_constant(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else { - impl::assemble_cells( + impl::assemble_cells( P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } @@ -1269,21 +1208,24 @@ void assemble_vector( assert((facets.size() / 2) * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, + {dofs, std::integral_constant(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, + {dofs, std::integral_constant(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else { - impl::assemble_exterior_facets( + impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); @@ -1308,10 +1250,10 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, bs, + {*dofmap, std::integral_constant(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), @@ -1319,10 +1261,10 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, bs, + {*dofmap, std::integral_constant(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), @@ -1330,7 +1272,7 @@ void assemble_vector( } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap, bs, From 5b65ad8cab96e44b307ecb01721a0a6245175b0d Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 17:50:55 +0200 Subject: [PATCH 04/21] Adapt demo --- cpp/demo/custom_kernel/main.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index b49da2ca416..8df8b56d24a 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -167,9 +168,10 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells([](auto, auto, auto, auto) {}, - b.mutable_array(), g.dofmap(), x, cells, - {dofmap.map(), 1, cells}, kernel, {}, {}, {}); + fem::impl::assemble_cells>( + [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, + {dofmap.map(), std::integral_constant(), cells}, kernel, {}, {}, + {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } From 29a1219246bcba03ec7fa4aa4da8dd5488c5640a Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 19:44:36 +0200 Subject: [PATCH 05/21] Introduce BS<> alias --- cpp/demo/custom_kernel/main.cpp | 5 ++- cpp/dolfinx/common/types.h | 4 +++ cpp/dolfinx/fem/assemble_vector_impl.h | 48 +++++++++++--------------- cpp/dolfinx/fem/pack.h | 12 +++---- 4 files changed, 32 insertions(+), 37 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index 8df8b56d24a..caf35e2ba44 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -168,10 +168,9 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells>( + fem::impl::assemble_cells>( [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, - {dofmap.map(), std::integral_constant(), cells}, kernel, {}, {}, - {}); + {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 9ac581fca5f..afa2d725b06 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -54,6 +54,10 @@ concept BlockSize requires T::value >= 1 && T::value <= MaxOptimizedBlockSize; }); +/// @private Short notation for a compile time block size. +template +using BS = std::integral_constant; + /// @private Check if block size is a compile time constant. template constexpr bool is_compile_time_v = !std::is_same_v; diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index d9ccc003bcf..2df8ee1a78c 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -920,21 +920,17 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { - _lift_bc_cells, - std::integral_constant>( - b, x_dofmap, x, kernel, cells, - {dofmap0, std::integral_constant(), cells0}, P0, - {dofmap1, std::integral_constant(), cells1}, P1T, constants, - coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, BS<1>>( + b, x_dofmap, x, kernel, cells, {dofmap0, BS<1>(), cells0}, P0, + {dofmap1, BS<1>(), cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } else if (bs0 == 3 and bs1 == 3) { - _lift_bc_cells, - std::integral_constant>( - b, x_dofmap, x, kernel, cells, - {dofmap0, std::integral_constant(), cells0}, P0, - {dofmap1, std::integral_constant(), cells1}, P1T, constants, - coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, BS<3>>( + b, x_dofmap, x, kernel, cells, {dofmap0, BS<3>(), cells0}, P0, + {dofmap1, BS<3>(), cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } else { @@ -1158,16 +1154,14 @@ void assemble_vector( assert(cells.size() * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_cells>( - P0, b, x_dofmap, x, cells, - {dofs, std::integral_constant(), cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, {dofs, BS<1>(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else if (bs == 3) { - impl::assemble_cells>( - P0, b, x_dofmap, x, cells, - {dofs, std::integral_constant(), cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, {dofs, BS<3>(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else @@ -1209,17 +1203,15 @@ void assemble_vector( if (bs == 1) { - impl::assemble_exterior_facets>( - P0, b, x_dofmap, x, facets, - {dofs, std::integral_constant(), facets1}, fn, constants, + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets>( - P0, b, x_dofmap, x, facets, - {dofs, std::integral_constant(), facets1}, fn, constants, + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } @@ -1250,10 +1242,10 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, std::integral_constant(), + {*dofmap, BS<1>(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), @@ -1261,10 +1253,10 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, std::integral_constant(), + {*dofmap, BS<3>(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 2033bbb100d..63e82ba0e07 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -108,8 +108,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl(cell_coeff, cell, std::integral_constant(), v, - cell_info, dofmap, transformation); + pack_impl(cell_coeff, cell, BS<1>(), v, cell_info, dofmap, + transformation); } } break; @@ -119,8 +119,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl(cell_coeff, cell, std::integral_constant(), v, - cell_info, dofmap, transformation); + pack_impl(cell_coeff, cell, BS<2>(), v, cell_info, dofmap, + transformation); } } break; @@ -130,8 +130,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl(cell_coeff, cell, std::integral_constant(), v, - cell_info, dofmap, transformation); + pack_impl(cell_coeff, cell, BS<3>(), v, cell_info, dofmap, + transformation); } } break; From 10cc79c1bdea96017d666caa7b1ea41577e5cd82 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 19:46:06 +0200 Subject: [PATCH 06/21] Use BlockSize in spmv --- cpp/dolfinx/la/MatrixCSR.h | 17 +++++++++-------- cpp/dolfinx/la/matrix_csr_impl.h | 23 +++++++---------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/cpp/dolfinx/la/MatrixCSR.h b/cpp/dolfinx/la/MatrixCSR.h index 104fb8edb25..3bfa269684e 100644 --- a/cpp/dolfinx/la/MatrixCSR.h +++ b/cpp/dolfinx/la/MatrixCSR.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -788,13 +789,13 @@ void MatrixCSR::mult(la::Vector& x, // yi[0] += Ai[0] * xi[0] if (_bs[1] == 1) { - impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y, - _bs[0], 1); + impl::spmv>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, + _y, _bs[0], BS<1>()); } else { - impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y, - _bs[0], _bs[1]); + impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, + _y, _bs[0], _bs[1]); } // finalize ghost update @@ -804,13 +805,13 @@ void MatrixCSR::mult(la::Vector& x, // yi[0] += Ai[1] * xi[1] if (_bs[1] == 1) { - impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, - _bs[0], 1); + impl::spmv>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, + _y, _bs[0], BS<1>()); } else { - impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, - _bs[0], _bs[1]); + impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, + _bs[0], _bs[1]); } } diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h index 143b6c0d4f8..29f9ffde6fa 100644 --- a/cpp/dolfinx/la/matrix_csr_impl.h +++ b/cpp/dolfinx/la/matrix_csr_impl.h @@ -6,6 +6,7 @@ #pragma once +#include "dolfinx/common/types.h" #include #include #include @@ -222,12 +223,13 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr, /// @param y /// @param bs0 /// @param bs1 -template +template void spmv(std::span values, std::span row_begin, std::span row_end, std::span indices, std::span x, - std::span y, int bs0, int bs1) + std::span y, int bs0, BS1 _bs1) { + int bs1 = block_size(_bs1); assert(row_begin.size() == row_end.size()); for (int k0 = 0; k0 < bs0; ++k0) { @@ -236,21 +238,10 @@ void spmv(std::span values, std::span row_begin, T vi{0}; for (std::int32_t j = row_begin[i]; j < row_end[i]; j++) { - if constexpr (BS1 == -1) + for (int k1 = 0; k1 < bs1; ++k1) { - for (int k1 = 0; k1 < bs1; ++k1) - { - vi += values[j * bs1 * bs0 + k1 * bs0 + k0] - * x[indices[j] * bs1 + k1]; - } - } - else - { - for (int k1 = 0; k1 < BS1; ++k1) - { - vi += values[j * BS1 * bs0 + k1 * bs0 + k0] - * x[indices[j] * BS1 + k1]; - } + vi += values[j * bs1 * bs0 + k1 * bs0 + k0] + * x[indices[j] * bs1 + k1]; } } From 1fb65d474a7e7d4f947776372504bf707614008b Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 20:44:19 +0200 Subject: [PATCH 07/21] doc --- cpp/dolfinx/la/matrix_csr_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h index 29f9ffde6fa..3a1d625b611 100644 --- a/cpp/dolfinx/la/matrix_csr_impl.h +++ b/cpp/dolfinx/la/matrix_csr_impl.h @@ -222,7 +222,7 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr, /// @param x /// @param y /// @param bs0 -/// @param bs1 +/// @param _bs1 template void spmv(std::span values, std::span row_begin, std::span row_end, From dcfef33dd03c229f7b85445c3156543f0bcb1a17 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 01:38:46 +0200 Subject: [PATCH 08/21] Introduce generic ConstexprType --- cpp/dolfinx/common/types.h | 39 ++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index afa2d725b06..dc66cd0cc19 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -44,33 +44,48 @@ namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; /// @private Constant of maximum compile time optimized block sizes. constexpr int MaxOptimizedBlockSize = 3; +/// @private Concept defining a variadic compile time or runtime variable. T +/// indicates the type that is stored and V the value. Either V equals T, i.e. +/// it is a runtime variable or V defines a compile time value V::value of type +/// T. +template +concept ConstexprType = std::is_same_v || (requires { + typename V::value_type; + requires std::is_same_v; + }); + +/// @private Check if ConstexprType holds a compile time constant. +template + requires ConstexprType +constexpr bool is_compile_time_v = !std::is_same_v; + +/// @private Check if ConstexprType holds a run time variable. +template + requires ConstexprType +constexpr bool is_runtime_v = std::is_same_v; + /// @private Concept capturing both compile time defined block sizes and runtime /// ones. -template -concept BlockSize - = std::is_same_v || (requires { - typename T::value_type; - requires std::is_same_v; - requires T::value >= 1 && T::value <= MaxOptimizedBlockSize; - }); +template +concept BlockSize = ConstexprType; /// @private Short notation for a compile time block size. template using BS = std::integral_constant; /// @private Check if block size is a compile time constant. -template -constexpr bool is_compile_time_v = !std::is_same_v; +template +constexpr bool is_compile_time_bs_v = is_compile_time_v; /// @private Check if block size is a run time constant. -template -constexpr bool is_runtime_v = std::is_same_v; +template +constexpr bool is_runtime_bs_v = is_runtime_v; /// @private Retrieves the integral block size of a runtime or compile time /// block size. int block_size(BlockSize auto bs) { - if constexpr (is_compile_time_v) + if constexpr (is_compile_time_bs_v) return decltype(bs)::value; return bs; From b8b0f9025a500fdbb84f003ddde35c841aebde68 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 02:05:09 +0200 Subject: [PATCH 09/21] value() --- cpp/dolfinx/common/types.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index dc66cd0cc19..9bff16698fa 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -64,6 +64,16 @@ template requires ConstexprType constexpr bool is_runtime_v = std::is_same_v; +template + requires ConstexprType +T value(V container) +{ + if constexpr (is_compile_time_v) + return V::value; + + return container; +} + /// @private Concept capturing both compile time defined block sizes and runtime /// ones. template @@ -85,10 +95,7 @@ constexpr bool is_runtime_bs_v = is_runtime_v; /// block size. int block_size(BlockSize auto bs) { - if constexpr (is_compile_time_bs_v) - return decltype(bs)::value; - - return bs; + return value(bs); } } // namespace dolfinx From 152e8d058bb40faa885648af7e6d959a80704603 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 02:05:25 +0200 Subject: [PATCH 10/21] Add test case --- cpp/test/CMakeLists.txt | 1 + cpp/test/common/constexpr_type.cpp | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 cpp/test/common/constexpr_type.cpp diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 353681c32cb..bb8db312bb6 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -50,6 +50,7 @@ add_executable( matrix.cpp io.cpp common/CIFailure.cpp + common/constexpr_type.cpp common/sub_systems_manager.cpp common/index_map.cpp common/sort.cpp diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp new file mode 100644 index 00000000000..5c91a97eef1 --- /dev/null +++ b/cpp/test/common/constexpr_type.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2025 Paul T. Kühner +// +// This file is part of DOLFINx (https://www.fenicsproject.org) +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include + +#include +#include +#include +#include + +namespace +{ +template +void test() +{ + using V_runtime = T; + static_assert(!dolfinx::is_compile_time_v); + static_assert(dolfinx::is_runtime_v); + assert((dolfinx::value(V_runtime(1)) == T(1))); + + using V_compile_time = std::integral_constant; + static_assert(dolfinx::is_compile_time_v); + static_assert(!dolfinx::is_runtime_v); + assert((dolfinx::value(V_compile_time()) == T(1))); +} +} // namespace + +TEST_CASE("Test constexpr type", "[constexpr_type]") +{ + test(); + test(); + test(); + test(); + test(); +} \ No newline at end of file From 0e2ad158fc18538d7dcb3d915d90b89c8c6b0647 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 02:07:05 +0200 Subject: [PATCH 11/21] format --- cpp/dolfinx/common/types.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 9bff16698fa..3774d1bb4a8 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -93,9 +93,6 @@ constexpr bool is_runtime_bs_v = is_runtime_v; /// @private Retrieves the integral block size of a runtime or compile time /// block size. -int block_size(BlockSize auto bs) -{ - return value(bs); -} +int block_size(BlockSize auto bs) { return value(bs); } } // namespace dolfinx From 31a146d98e0ac587d3c3c2d504fd1dadbcbeeb34 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:22:45 +0200 Subject: [PATCH 12/21] constexpr value access --- cpp/dolfinx/common/types.h | 10 +++++++--- cpp/test/common/constexpr_type.cpp | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 3774d1bb4a8..1e57e6872c1 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -66,11 +66,15 @@ constexpr bool is_runtime_v = std::is_same_v; template requires ConstexprType -T value(V container) +constexpr T value(V container, typename std::enable_if_t>* = 0) { - if constexpr (is_compile_time_v) - return V::value; + return V::value; +} +template + requires ConstexprType +T value(V container, typename std::enable_if_t>* = 0) +{ return container; } diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index 5c91a97eef1..196d5ab1209 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -24,7 +24,7 @@ void test() using V_compile_time = std::integral_constant; static_assert(dolfinx::is_compile_time_v); static_assert(!dolfinx::is_runtime_v); - assert((dolfinx::value(V_compile_time()) == T(1))); + static_assert((dolfinx::value(V_compile_time()) == T(1))); } } // namespace From 6a4d5b59d03fe3318ad142da4c45894cd2830490 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:24:12 +0200 Subject: [PATCH 13/21] format --- cpp/dolfinx/common/types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 1e57e6872c1..509c9944650 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -66,7 +66,8 @@ constexpr bool is_runtime_v = std::is_same_v; template requires ConstexprType -constexpr T value(V container, typename std::enable_if_t>* = 0) +constexpr T value(V container, + typename std::enable_if_t>* = 0) { return V::value; } From 2652fb5b20e195c0c5e3c260b4a27d18d4267307 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:40:48 +0200 Subject: [PATCH 14/21] Bump PETSc/SLEPc --- docker/Dockerfile.redhat | 2 +- docker/Dockerfile.test-env | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.redhat b/docker/Dockerfile.redhat index a07786255eb..8b512027d71 100644 --- a/docker/Dockerfile.redhat +++ b/docker/Dockerfile.redhat @@ -8,7 +8,7 @@ FROM rockylinux/rockylinux:9 ARG BUILD_NP=4 ARG HDF5_VERSION=1.14.6 -ARG PETSC_VERSION=3.23.0 +ARG PETSC_VERSION=3.23.1 ARG MPICH_VERSION=4.2.3 WORKDIR /tmp diff --git a/docker/Dockerfile.test-env b/docker/Dockerfile.test-env index 96203cf1fff..9b622e1839b 100644 --- a/docker/Dockerfile.test-env +++ b/docker/Dockerfile.test-env @@ -22,8 +22,8 @@ ARG KAHIP_VERSION=3.18 # the most recent Numba release, see # https://numba.readthedocs.io/en/stable/user/installing.html#version-support-information ARG NUMPY_VERSION=2.1.3 -ARG PETSC_VERSION=3.23.0 -ARG SLEPC_VERSION=3.23.0 +ARG PETSC_VERSION=3.23.1 +ARG SLEPC_VERSION=3.23.1 ARG SPDLOG_VERSION=1.15.1 ARG MPICH_VERSION=4.2.3 From c762822f69604dfe476397f7752963e1cdc9478a Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:43:15 +0200 Subject: [PATCH 15/21] Revert "Bump PETSc/SLEPc" This reverts commit 2652fb5b20e195c0c5e3c260b4a27d18d4267307. --- docker/Dockerfile.redhat | 2 +- docker/Dockerfile.test-env | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.redhat b/docker/Dockerfile.redhat index 8b512027d71..a07786255eb 100644 --- a/docker/Dockerfile.redhat +++ b/docker/Dockerfile.redhat @@ -8,7 +8,7 @@ FROM rockylinux/rockylinux:9 ARG BUILD_NP=4 ARG HDF5_VERSION=1.14.6 -ARG PETSC_VERSION=3.23.1 +ARG PETSC_VERSION=3.23.0 ARG MPICH_VERSION=4.2.3 WORKDIR /tmp diff --git a/docker/Dockerfile.test-env b/docker/Dockerfile.test-env index 9b622e1839b..96203cf1fff 100644 --- a/docker/Dockerfile.test-env +++ b/docker/Dockerfile.test-env @@ -22,8 +22,8 @@ ARG KAHIP_VERSION=3.18 # the most recent Numba release, see # https://numba.readthedocs.io/en/stable/user/installing.html#version-support-information ARG NUMPY_VERSION=2.1.3 -ARG PETSC_VERSION=3.23.1 -ARG SLEPC_VERSION=3.23.1 +ARG PETSC_VERSION=3.23.0 +ARG SLEPC_VERSION=3.23.0 ARG SPDLOG_VERSION=1.15.1 ARG MPICH_VERSION=4.2.3 From 796725cf935e29c382d81f6831e5186eb4f4ed8b Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 23:20:40 +0200 Subject: [PATCH 16/21] Tidy up --- cpp/dolfinx/common/CMakeLists.txt | 1 + cpp/dolfinx/common/constexpr_type.h | 53 ++++++++++++++++++++++++ cpp/dolfinx/common/types.h | 62 +++++++---------------------- cpp/test/common/constexpr_type.cpp | 16 ++++---- 4 files changed, 78 insertions(+), 54 deletions(-) create mode 100644 cpp/dolfinx/common/constexpr_type.h diff --git a/cpp/dolfinx/common/CMakeLists.txt b/cpp/dolfinx/common/CMakeLists.txt index 3a5c77044e0..20d486e56fc 100644 --- a/cpp/dolfinx/common/CMakeLists.txt +++ b/cpp/dolfinx/common/CMakeLists.txt @@ -1,5 +1,6 @@ set(HEADERS_common ${CMAKE_CURRENT_SOURCE_DIR}/defines.h + ${CMAKE_CURRENT_SOURCE_DIR}/constexpr_type.h ${CMAKE_CURRENT_SOURCE_DIR}/dolfinx_common.h ${CMAKE_CURRENT_SOURCE_DIR}/dolfinx_doc.h ${CMAKE_CURRENT_SOURCE_DIR}/IndexMap.h diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h new file mode 100644 index 00000000000..4e7e656bb7e --- /dev/null +++ b/cpp/dolfinx/common/constexpr_type.h @@ -0,0 +1,53 @@ +// Copyright (C) 2025 Paul T. Kühner +// +// This file is part of DOLFINx (https://www.fenicsproject.org) +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#pragma once + +#include + +namespace dolfinx::common +{ +/// @private Concept defining a variadic compile time or runtime variable. T +/// indicates the type that is stored and V the value. Either V equals T, i.e. +/// it is a runtime variable or V defines a compile time value V::value of type +/// T. +/// @tparam T type of the value to be stored. +/// @tparam V container type. Usually T for a runtime variable or a +/// std::integral_constant for a compile time constant. +template +concept ConstexprType = std::is_same_v || (requires { + typename V::value_type; + requires std::is_same_v; + }); + +/// @private Check if ConstexprType holds a compile time constant. +template + requires ConstexprType +constexpr bool is_compile_time_v = !std::is_same_v; + +/// @private Check if ConstexprType holds a run time variable. +template + requires ConstexprType +constexpr bool is_runtime_v = std::is_same_v; + +/// @private Retrieve value of a compile time constant form a ConstexprType. +template + requires ConstexprType +constexpr T value(V container, + typename std::enable_if_t>* = 0) +{ + return V::value; +} + +/// @private Retrieve value of runtime variable form a ConstexprType. +template + requires ConstexprType +T value(V container, typename std::enable_if_t>* = 0) +{ + return container; +} + +} // namespace dolfinx::common diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 509c9944650..8bbcdf5a208 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace dolfinx @@ -41,63 +42,30 @@ using scalar_value_t = typename scalar_value::type; /// @private mdspan/mdarray namespace namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; -/// @private Constant of maximum compile time optimized block sizes. -constexpr int MaxOptimizedBlockSize = 3; - -/// @private Concept defining a variadic compile time or runtime variable. T -/// indicates the type that is stored and V the value. Either V equals T, i.e. -/// it is a runtime variable or V defines a compile time value V::value of type -/// T. -template -concept ConstexprType = std::is_same_v || (requires { - typename V::value_type; - requires std::is_same_v; - }); - -/// @private Check if ConstexprType holds a compile time constant. -template - requires ConstexprType -constexpr bool is_compile_time_v = !std::is_same_v; - -/// @private Check if ConstexprType holds a run time variable. -template - requires ConstexprType -constexpr bool is_runtime_v = std::is_same_v; - -template - requires ConstexprType -constexpr T value(V container, - typename std::enable_if_t>* = 0) -{ - return V::value; -} - -template - requires ConstexprType -T value(V container, typename std::enable_if_t>* = 0) -{ - return container; -} - /// @private Concept capturing both compile time defined block sizes and runtime /// ones. template -concept BlockSize = ConstexprType; +concept BlockSize = common::ConstexprType; /// @private Short notation for a compile time block size. template using BS = std::integral_constant; -/// @private Check if block size is a compile time constant. +/// @private Retrieves the integral block size of a compile time block size. template -constexpr bool is_compile_time_bs_v = is_compile_time_v; +constexpr int +block_size(V bs, + typename std::enable_if_t>* = 0) +{ + return common::value(bs); +} -/// @private Check if block size is a run time constant. +/// @private Retrieves the integral block size of a runtime block size. template -constexpr bool is_runtime_bs_v = is_runtime_v; - -/// @private Retrieves the integral block size of a runtime or compile time -/// block size. -int block_size(BlockSize auto bs) { return value(bs); } +int block_size(V bs, + typename std::enable_if_t>* = 0) +{ + return common::value(bs); +} } // namespace dolfinx diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index 196d5ab1209..c2bd6189a1a 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -8,23 +8,25 @@ #include #include -#include +#include #include +using namespace dolfinx::common; + namespace { template void test() { using V_runtime = T; - static_assert(!dolfinx::is_compile_time_v); - static_assert(dolfinx::is_runtime_v); - assert((dolfinx::value(V_runtime(1)) == T(1))); + static_assert(!is_compile_time_v); + static_assert(is_runtime_v); + assert((value(V_runtime(1)) == T(1))); using V_compile_time = std::integral_constant; - static_assert(dolfinx::is_compile_time_v); - static_assert(!dolfinx::is_runtime_v); - static_assert((dolfinx::value(V_compile_time()) == T(1))); + static_assert(is_compile_time_v); + static_assert(!is_runtime_v); + static_assert((value(V_compile_time()) == T(1))); } } // namespace From 460b35034d296dc748cbd85182b7d38cdc7240bd Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 1 May 2025 01:51:50 +0200 Subject: [PATCH 17/21] Compiler limitation for floating point values --- cpp/dolfinx/common/constexpr_type.h | 2 +- cpp/test/common/constexpr_type.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4e7e656bb7e..4051ef54034 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -constexpr T value(V container, +constexpr T value(V /* container */, typename std::enable_if_t>* = 0) { return V::value; diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index c2bd6189a1a..e88314ef730 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -35,6 +35,11 @@ TEST_CASE("Test constexpr type", "[constexpr_type]") test(); test(); test(); + +// is C++ 20, but some compilers do not fully support, see +// https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L +#if defined(__cpp_nontype_template_args) test(); test(); +#endif } \ No newline at end of file From 5c1d722bd145b42cd9abe028571da507c3ed6d5b Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 1 May 2025 02:13:52 +0200 Subject: [PATCH 18/21] Misses year code --- cpp/test/common/constexpr_type.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index e88314ef730..b9a4fc66bcf 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -38,7 +38,8 @@ TEST_CASE("Test constexpr type", "[constexpr_type]") // is C++ 20, but some compilers do not fully support, see // https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L -#if defined(__cpp_nontype_template_args) +#if defined(__cpp_nontype_template_args) \ + && __cpp_nontype_template_args >= 201911L test(); test(); #endif From 5f3d563409485f9870ed153ac6b34e371bc7c00e Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 2 Jun 2025 21:40:09 +0200 Subject: [PATCH 19/21] Even better --- cpp/dolfinx/common/constexpr_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4051ef54034..4ddf7ade352 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -constexpr T value(V /* container */, +consteval T value(V /* container */, typename std::enable_if_t>* = 0) { return V::value; From 6f6cb9bd56ebf05971c4199f918460deb596e8dd Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 2 Jun 2025 21:47:43 +0200 Subject: [PATCH 20/21] Missed one --- cpp/dolfinx/common/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 8bbcdf5a208..2ed7ed57924 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -53,7 +53,7 @@ using BS = std::integral_constant; /// @private Retrieves the integral block size of a compile time block size. template -constexpr int +consteval int block_size(V bs, typename std::enable_if_t>* = 0) { From c50ce564bf304dd30bd9231a544daadf94aadbcc Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:47:28 +0200 Subject: [PATCH 21/21] Revert to constexpr --- cpp/dolfinx/common/constexpr_type.h | 2 +- cpp/dolfinx/common/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4ddf7ade352..4051ef54034 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -consteval T value(V /* container */, +constexpr T value(V /* container */, typename std::enable_if_t>* = 0) { return V::value; diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 2ed7ed57924..8bbcdf5a208 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -53,7 +53,7 @@ using BS = std::integral_constant; /// @private Retrieves the integral block size of a compile time block size. template -consteval int +constexpr int block_size(V bs, typename std::enable_if_t>* = 0) {