fortran-lang · jalvesz · Feb 27, 2025 · Dec 24, 2024 · Dec 24, 2024 · Dec 26, 2024
diff --git a/doc/specs/stdlib_intrinsics.md b/doc/specs/stdlib_intrinsics.md
@@ -0,0 +1,158 @@
+---
+title: intrinsics
+---
+
+# The `stdlib_intrinsics` module
+
+[TOC]
+
+## Introduction
+
+The `stdlib_intrinsics` module provides replacements for some of the well known intrinsic functions found in Fortran compilers for which either a faster and/or more accurate implementation is found which has also proven of interest to the Fortran community.
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `stdlib_sum` function
+
+#### Description
+
+The `stdlib_sum` function can replace the intrinsic `sum` for `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when summing large arrays, for repetitive summation of smaller arrays consider the classical `sum`.
+
+#### Syntax
+
+`res = ` [[stdlib_intrinsics(module):stdlib_sum(interface)]] ` (x [,mask] )`
+
+`res = ` [[stdlib_intrinsics(module):stdlib_sum(interface)]] ` (x, dim [,mask] )`
+
+#### Status
+
+Experimental
+
+#### Class
+
+Pure function.
+
+#### Argument(s)
+
+`x`: N-D array of either `real` or `complex` type. This argument is `intent(in)`.
+
+`dim` (optional): scalar of type `integer` with a value in the range from 1 to n, where n equals the rank of `x`.
+
+`mask` (optional): N-D array of `logical` values, with the same shape as `x`. This argument is `intent(in)`.
+
+#### Output value or Result value
+
+If `dim` is absent, the output is a scalar of the same `type` and `kind` as to that of `x`. Otherwise, an array of rank n-1, where n equals the rank of `x`, and a shape similar to that of `x` with dimension `dim` dropped is returned.
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `stdlib_sum_kahan` function
+
+#### Description
+
+The `stdlib_sum_kahan` function can replace the intrinsic `sum` for `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential complemented by an `elemental` kernel based on the [kahan summation](https://doi.org/10.1145%2F363707.363723) strategy to reduce the round-off error:
+
+```fortran
+elemental subroutine kahan_kernel_<kind>(a,s,c)
+    type(<kind>), intent(in) :: a
+    type(<kind>), intent(inout) :: s
+    type(<kind>), intent(inout) :: c
+    type(<kind>) :: t, y
+    y = a - c
+    t = s + y
+    c = (t - s) - y
+    s = t
+end subroutine
+```
+
+#### Syntax
+
+`res = ` [[stdlib_intrinsics(module):stdlib_sum_kahan(interface)]] ` (x [,mask] )`
+
+`res = ` [[stdlib_intrinsics(module):stdlib_sum_kahan(interface)]] ` (x, dim [,mask] )`
+
+#### Status
+
+Experimental
+
+#### Class
+
+Pure function.
+
+#### Argument(s)
+
+`x`: 1D array of either `real` or `complex` type. This argument is `intent(in)`.
+
+`dim` (optional): scalar of type `integer` with a value in the range from 1 to n, where n equals the rank of `x`.
+
+`mask` (optional): N-D array of `logical` values, with the same shape as `x`. This argument is `intent(in)`.
+
+#### Output value or Result value
+
+If `dim` is absent, the output is a scalar of the same `type` and `kind` as to that of `x`. Otherwise, an array of rank n-1, where n equals the rank of `x`, and a shape similar to that of `x` with dimension `dim` dropped is returned.
+
+#### Example
+
+```fortran
+{!example/intrinsics/example_sum.f90!}
+```
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `stdlib_dot_product` function
+
+#### Description
+
+The `stdlib_dot_product` function can replace the intrinsic `dot_product` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when crunching large arrays, for repetitive products of smaller arrays consider the classical `dot_product`.
-The `stdlib_dot_product` function can replace the intrinsic `dot_product` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when crunching large arrays, for repetitive products of smaller arrays consider the classical `dot_product`.
+The `stdlib_dot_product` function can replace the intrinsic `dot_product` for 1D `real`, `complex` or `integer` arrays. It follows a blocked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is the most efficient when operating on large arrays.
-The `stdlib_dot_product` function can replace the intrinsic `dot_product` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when crunching large arrays, for repetitive products of smaller arrays consider the classical `dot_product`.
+The `stdlib_dot_product` function can replace the intrinsic `dot_product` for 1D `real`, `complex` or `integer` arrays. It follows a blocked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is the most efficient when operating on large arrays.
+
+#### Syntax
+
+`res = ` [[stdlib_intrinsics(module):stdlib_dot_product(interface)]] ` (x, y)`
+
+#### Status
+
+Experimental
+
+#### Class
+
+Pure function.
+
+#### Argument(s)
+
+`x`: 1D array of either `real` or `complex` type. This argument is `intent(in)`.
+
+`y`: 1D array of the same type and kind as `x`. This argument is `intent(in)`.
+
+#### Output value or Result value
+
+The output is a scalar of `type` and `kind` same as to that of `x` and `y`.
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `stdlib_dot_product_kahan` function
+
+#### Description
+
+The `stdlib_dot_product_kahan` function can replace the intrinsic `dot_product` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential , complemented by the same `elemental` kernel based on the [kahan summation](https://doi.org/10.1145%2F363707.363723) used for `stdlib_sum` to reduce the round-off error.
+
+#### Syntax
+
+`res = ` [[stdlib_intrinsics(module):stdlib_dot_product_kahan(interface)]] ` (x, y)`
+
+#### Status
+
+Experimental
+
+#### Class
+
+Pure function.
+
+#### Argument(s)
+
+`x`: 1D array of either `real` or `complex` type. This argument is `intent(in)`.
+
+`y`: 1D array of the same type and kind as `x`. This argument is `intent(in)`.
+
+#### Output value or Result value
+
+The output is a scalar of `type` and `kind` same as to that of `x` and `y`.
+
+```fortran
+{!example/intrinsics/example_dot_product.f90!}
+```
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -13,6 +13,7 @@ add_subdirectory(constants)
 add_subdirectory(error)
 add_subdirectory(hashmaps)
 add_subdirectory(hash_procedures)
+add_subdirectory(intrinsics)
 add_subdirectory(io)
 add_subdirectory(linalg)
 add_subdirectory(logger)

diff --git a/example/intrinsics/CMakeLists.txt b/example/intrinsics/CMakeLists.txt
@@ -0,0 +1,2 @@
+ADD_EXAMPLE(sum)
+ADD_EXAMPLE(dot_product)
diff --git a/example/intrinsics/example_dot_product.f90 b/example/intrinsics/example_dot_product.f90
@@ -0,0 +1,18 @@
+program example_dot_product
+    use stdlib_kinds, only: sp
+    use stdlib_intrinsics, only: stdlib_dot_product, stdlib_dot_product_kahan
+    implicit none
+
+    real(sp), allocatable :: x(:), y(:)
+    real(sp) :: total_prod(3)
+
+    allocate( x(1000), y(1000) )
+    call random_number(x)
+    call random_number(y)
+
+    total_prod(1) = dot_product(x,y) !> compiler intrinsic
+    total_prod(2) = stdlib_dot_product(x,y)       !> chunked summation over inner product
+    total_prod(3) = stdlib_dot_product_kahan(x,y) !> chunked kahan summation over inner product
+    print *, total_prod(1:3)
+
+end program example_dot_product
diff --git a/example/intrinsics/example_sum.f90 b/example/intrinsics/example_sum.f90
@@ -0,0 +1,17 @@
+program example_sum
+    use stdlib_kinds, only: sp
+    use stdlib_intrinsics, only: stdlib_sum, stdlib_sum_kahan
+    implicit none
+
+    real(sp), allocatable :: x(:)
+    real(sp) :: total_sum(3)
+
+    allocate( x(1000) )
+    call random_number(x)
+
+    total_sum(1) = sum(x)       !> compiler intrinsic
+    total_sum(2) = stdlib_sum(x)      !> chunked summation
+    total_sum(3) = stdlib_sum_kahan(x)!> chunked kahan summation
+    print *, total_sum(1:3)
+
+end program example_sum
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -16,6 +16,9 @@ set(fppFiles
     stdlib_hash_64bit_fnv.fypp
     stdlib_hash_64bit_pengy.fypp
     stdlib_hash_64bit_spookyv2.fypp
+    stdlib_intrinsics_dot_product.fypp
+    stdlib_intrinsics_sum.fypp
+    stdlib_intrinsics.fypp
     stdlib_io.fypp
     stdlib_io_npy.fypp
     stdlib_io_npy_load.fypp

diff --git a/src/stdlib_constants.fypp b/src/stdlib_constants.fypp
@@ -1,5 +1,8 @@
 #:include "common.fypp"
 #:set KINDS = REAL_KINDS
+#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
+#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
+
 module stdlib_constants
     !! Constants
     !! ([Specification](../page/specs/stdlib_constants.html))
@@ -60,5 +63,13 @@ module stdlib_constants
     real(dp), parameter, public :: u = ATOMIC_MASS_CONSTANT%value !! Atomic mass constant
 
     ! Additional constants if needed
+    #:for k, t, s in R_KINDS_TYPES
+    ${t}$, parameter, public :: zero_${s}$ = 0._${k}$
+    ${t}$, parameter, public :: one_${s}$  = 1._${k}$
+    #:endfor
+    #:for k, t, s in C_KINDS_TYPES
+    ${t}$, parameter, public :: zero_${s}$ = (0._${k}$,0._${k}$)
+    ${t}$, parameter, public :: one_${s}$  = (1._${k}$,0._${k}$)
+    #:endfor
 
 end module stdlib_constants