Skip to content

Commit 18e67e1

Browse files
committed
Replace locking by atomic operations
`s_warray_init()` and `s_warray_free()` are not safe and MUST NOT be called from multiple threads. This also removes `MP_WARRAY_NUM`, since automatic initialization will not be safe for more than one thread. Signed-off-by: Steffen Jaeckel <[email protected]>
1 parent bd66fcc commit 18e67e1

10 files changed

+28
-115
lines changed

.github/workflows/main.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,7 @@ jobs:
7272

7373
# Build with small stack-size
7474
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' }
75-
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --cflags=-DMP_NO_LOCKING', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' }
7675
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' }
77-
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --cflags=-DMP_TEST_LOCKING', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' }
7876

7977
# Test "autotuning", the automatic evaluation and setting of the Toom-Cook cut-offs.
8078
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_16BIT --limit-valgrind --make-option=tune'

demo/test.c

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,21 +2451,6 @@ static int test_mp_pack_unpack(void)
24512451
return EXIT_FAILURE;
24522452
}
24532453

2454-
2455-
#ifdef MP_TEST_LOCKING
2456-
#ifdef MP_NO_LOCKING
2457-
#error "Can't test locking when locking is disabled"
2458-
#endif
2459-
static mp_lock lock_ctx;
2460-
static int noop_lock_unlock(void *ctx)
2461-
{
2462-
EXPECT(ctx == &lock_ctx);
2463-
return 0;
2464-
LBL_ERR:
2465-
return -1;
2466-
}
2467-
#endif
2468-
24692454
#ifndef LTM_TEST_DYNAMIC
24702455
#define ONLY_PUBLIC_API_C
24712456
#endif
@@ -2540,14 +2525,6 @@ static int unit_tests(int argc, char **argv)
25402525
unsigned long i, ok, fail, nop;
25412526
uint64_t t;
25422527
int j;
2543-
#ifdef MP_TEST_LOCKING
2544-
lock_ctx.lock = noop_lock_unlock;
2545-
lock_ctx.unlock = noop_lock_unlock;
2546-
lock_ctx.ctx = &lock_ctx;
2547-
2548-
if (mp_warray_init(MP_WARRAY_NUM, true, &lock_ctx) != MP_OKAY)
2549-
return EXIT_FAILURE;
2550-
#endif
25512528
ok = fail = nop = 0;
25522529

25532530
t = (uint64_t)time(NULL);

doc/bn.tex

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -357,14 +357,10 @@ \subsection{Small-Stack option}
357357
The library can be compiled with the symbol \texttt{MP\_SMALL\_STACK\_SIZE} defined, which results in
358358
the temporary \texttt{MP\_WARRAY}-sized stack buffers being put on the heap.
359359
This comes with one problem, namely: formerly promised thread-safety isn't given anymore.
360-
Therefore if the Small-Stack option is enabled while doing multi threading, the provided locking
361-
mechanism shall be used.
362-
For some use cases it can be desired to use the Small-Stack option, but there are no threads and
363-
therefore we provide the possibility to disable locking by defining the symbol \texttt{MP\_NO\_LOCKING}.
360+
Therefore if the Small-Stack option is enabled while doing multi threading, one shall always initialize
361+
the library by calling \texttt{mp\_warray\_init()} once with the correct number of threads.
364362

365-
In case one already knows how many threads must be supported, the symbol \texttt{MP\_WARRAY\_NUM} can
366-
be useful. It can be pre-defined at compile time to the number of heap buffers created on automatic
367-
initialisation. C.f. \ref{ch:SMALL_STACK_API} for the dynamic API and further details.
363+
C.f. \ref{ch:SMALL_STACK_API} for the API description and further details.
368364

369365
\section{Purpose of LibTomMath}
370366
Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath
@@ -443,8 +439,10 @@ \section{Building Programs}
443439
In order to use LibTomMath you must include ``tommath.h'' and link against the appropriate library
444440
file (typically
445441
libtommath.a). There is no library initialization required and the entire library is thread safe
446-
if it is used in its default configuration. Locking is recommended if the small-stack option
447-
is enabled and multiple threads are used, c.f. \ref{ch:SMALL_STACK_INTRO} resp. \ref{ch:SMALL_STACK_API}
442+
if it is used in its default configuration. The small-stack option makes use of atomic operations
443+
to maintain its internal state and therefore does not require locking, but it MUST be initialized
444+
if used from multiple threads. For further information see \ref{ch:SMALL_STACK_INTRO} resp.
445+
\ref{ch:SMALL_STACK_API}.
448446

449447
\section{Return Codes}
450448
There are five possible return codes a function may return.
@@ -839,27 +837,12 @@ \section{Small-Stack option}
839837

840838
\index{mp\_warray\_init}
841839
\begin{alltt}
842-
mp_err mp_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock);
840+
mp_err mp_warray_init(size_t n_alloc, bool preallocate);
843841
\end{alltt}
844842

845843
The flag \texttt{preallocate} controls whether the internal buffers --
846844
\texttt{n\_alloc} buffers of size \texttt{MP\_WARRAY} -- will be allocated when
847845
\texttt{mp\_warray\_init()} is called, or whether they will be allocated when required.
848-
The \texttt{mp\_lock} struct looks as follows and shall be used to protect the
849-
internal structure when using the library in a multi-threaded application.
850-
851-
\index{mp\_lock}
852-
\begin{alltt}
853-
typedef struct {
854-
int (*lock)(void *ctx);
855-
int (*unlock)(void *ctx);
856-
void *ctx;
857-
} mp_lock;
858-
\end{alltt}
859-
860-
The \texttt{mp\_lock.lock} resp. \texttt{mp\_lock.unlock} functions will be called before resp.
861-
after modifying the internal struct.
862-
The \texttt{mp\_lock.ctx} element will be passed to those functions.
863846

864847
To free the internally allocated memory the following function shall be called.
865848

mp_warray_free.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ static int s_warray_free(void)
1010
{
1111
int ret = 0;
1212
size_t n;
13-
S_MP_WARRAY_LOCK();
1413
for (n = 0; n < s_mp_warray.allocated; ++n) {
1514
if (s_mp_warray.l_used[n].warray) {
1615
ret = -2;
@@ -23,7 +22,6 @@ static int s_warray_free(void)
2322
}
2423
s_mp_warray_free(s_mp_warray.usable);
2524
ERR_OUT:
26-
S_MP_WARRAY_UNLOCK();
2725
return ret;
2826
}
2927

mp_warray_init.c

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,13 @@
33
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
44
/* SPDX-License-Identifier: Unlicense */
55

6-
static mp_err s_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock)
6+
static mp_err s_warray_init(size_t n_alloc, bool preallocate)
77
{
88
size_t n;
99
if (s_mp_warray.l_free != NULL || s_mp_warray.l_used != NULL) {
1010
return MP_VAL;
1111
}
1212

13-
if (MP_HAS(MP_USE_LOCKING) && (lock != NULL)) {
14-
if (lock->lock == NULL || lock->unlock == NULL)
15-
return MP_VAL;
16-
s_mp_warray.lock = *lock;
17-
s_mp_warray.locking_enabled = true;
18-
} else {
19-
s_mp_zero_buf(&s_mp_warray.lock, sizeof(s_mp_warray.lock));
20-
}
21-
2213
s_mp_warray.l_free = MP_CALLOC(n_alloc, sizeof(*(s_mp_warray.l_free)));
2314
s_mp_warray.l_used = MP_CALLOC(n_alloc, sizeof(*(s_mp_warray.l_used)));
2415
if (s_mp_warray.l_free == NULL || s_mp_warray.l_used == NULL) {
@@ -46,9 +37,9 @@ static mp_err s_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock)
4637
return MP_OKAY;
4738
}
4839

49-
mp_err mp_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock)
40+
mp_err mp_warray_init(size_t n_alloc, bool preallocate)
5041
{
51-
if (MP_HAS(MP_SMALL_STACK_SIZE)) return s_warray_init(n_alloc, preallocate, lock);
42+
if (MP_HAS(MP_SMALL_STACK_SIZE)) return s_warray_init(n_alloc, preallocate);
5243
return MP_ERR;
5344
}
5445

s_mp_warray_get.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,27 @@ void *s_mp_warray_get(void)
77
{
88
void *ret = NULL;
99
size_t n;
10-
S_MP_WARRAY_LOCK();
1110
if (s_mp_warray.usable == 0) {
12-
if (mp_warray_init(MP_WARRAY_NUM, false, NULL) != MP_OKAY)
11+
if (mp_warray_init(1, false) != MP_OKAY)
1312
return NULL;
1413
}
1514
for (n = 0; n < s_mp_warray.allocated; ++n) {
16-
if (s_mp_warray.l_free[n].warray) {
17-
s_mp_warray.l_used[n] = s_mp_warray.l_free[n];
18-
s_mp_warray.l_free[n].warray = NULL;
19-
ret = s_mp_warray.l_used[n].warray;
15+
if (s_mp_warray.l_free[n].warray == NULL)
16+
continue;
17+
ret = s_mp_warray.l_free[n].warray;
18+
if (MP_CMPEXCH(&s_mp_warray.l_free[n].warray, &ret, NULL)) {
19+
s_mp_warray.l_used[n].warray = ret;
2020
goto LBL_OUT;
2121
}
2222
}
23+
ret = NULL;
2324
if (s_mp_warray.allocated + 1 > s_mp_warray.usable)
2425
goto LBL_OUT;
2526
ret = MP_CALLOC(MP_WARRAY, sizeof(mp_word));
26-
s_mp_warray.l_used[s_mp_warray.allocated++].warray = ret;
27+
if (ret != NULL)
28+
s_mp_warray.l_used[s_mp_warray.allocated++].warray = ret;
2729

2830
LBL_OUT:
29-
S_MP_WARRAY_UNLOCK();
3031
return ret;
3132
}
3233

s_mp_warray_put.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@
55

66
void s_mp_warray_put(void *w)
77
{
8-
size_t n;
9-
S_MP_WARRAY_LOCK();
10-
for (n = 0; n < s_mp_warray.allocated; ++n) {
8+
size_t n, allocated = s_mp_warray.allocated;
9+
for (n = 0; n < allocated; ++n) {
1110
if (s_mp_warray.l_used[n].warray == w) {
12-
s_mp_warray.l_free[n] = s_mp_warray.l_used[n];
1311
s_mp_warray.l_used[n].warray = NULL;
12+
s_mp_warray.l_free[n].warray = w;
1413
break;
1514
}
1615
}
17-
S_MP_WARRAY_UNLOCK();
1816
}
1917

2018
#endif

tommath.h

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,6 @@ typedef uint32_t mp_digit;
7878
#define MP_MASK ((((mp_digit)1)<<((mp_digit)MP_DIGIT_BIT))-((mp_digit)1))
7979
#define MP_DIGIT_MAX MP_MASK
8080

81-
/* In case the stack size has to be limited, use a WARRAY from the heap */
82-
#ifdef MP_SMALL_STACK_SIZE
83-
/* Per default we enable the locking mechanism.
84-
* Please disable by defining `MP_NO_LOCKING` if you really know what you do.
85-
*/
86-
#ifndef MP_NO_LOCKING
87-
#define MP_USE_LOCKING
88-
#endif
89-
#endif /* MP_SMALL_STACK_SIZE */
90-
91-
/* The user can define how many WARRAY instances are allocated,
92-
* usually this should equal the number of parallel threads that
93-
* use LTM functionality.
94-
* This has no effect if `MP_SMALL_STACK_SIZE` is not defined.
95-
*/
96-
#ifndef MP_WARRAY_NUM
97-
#define MP_WARRAY_NUM 1
98-
#endif
99-
10081
/* Primality generation flags */
10182
#define MP_PRIME_BBS 0x0001 /* BBS style prime */
10283
#define MP_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */
@@ -607,13 +588,7 @@ mp_err mp_fread(mp_int *a, int radix, FILE *stream) MP_WUR;
607588
mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream) MP_WUR;
608589
#endif
609590

610-
typedef struct {
611-
int (*lock)(void *ctx);
612-
int (*unlock)(void *ctx);
613-
void *ctx;
614-
} mp_lock;
615-
616-
mp_err mp_warray_init(size_t n_alloc, bool preallocate, mp_lock *lock);
591+
mp_err mp_warray_init(size_t n_alloc, bool preallocate);
617592
int mp_warray_free(void);
618593

619594
#define mp_to_binary(M, S, N) mp_to_radix((M), (S), (N), NULL, 2)

tommath_class.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,6 @@
969969

970970
#if defined(MP_WARRAY_INIT_C)
971971
# define S_MP_WARRAY_FREE_C
972-
# define S_MP_ZERO_BUF_C
973972
#endif
974973

975974
#if defined(MP_XOR_C)

tommath_private.h

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ extern void *MP_CALLOC(size_t nmemb, size_t size);
104104
extern void MP_FREE(void *mem, size_t size);
105105
#endif
106106

107+
#ifndef MP_CMPEXCH
108+
#define MP_CMPEXCH(ptr, expected, desired) __atomic_compare_exchange_n(ptr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
109+
#endif
110+
107111
/* feature detection macro */
108112
#ifdef _MSC_VER
109113
/* Prevent false positive: not enough arguments for function-like macro invocation */
@@ -245,23 +249,12 @@ MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR;
245249
#define MP_CHECK_WARRAY(name)
246250
#endif
247251

248-
#ifdef MP_USE_LOCKING
249-
#define MP_USE_LOCKING_C
250-
#define S_MP_WARRAY_LOCK() do { if (s_mp_warray.locking_enabled) { s_mp_warray.lock.lock(s_mp_warray.lock.ctx); } } while(0)
251-
#define S_MP_WARRAY_UNLOCK() do { if (s_mp_warray.locking_enabled) { s_mp_warray.lock.unlock(s_mp_warray.lock.ctx); } } while(0)
252-
#else
253-
#define S_MP_WARRAY_LOCK()
254-
#define S_MP_WARRAY_UNLOCK()
255-
#endif
256-
257252
struct warray {
258253
void *warray;
259254
};
260255
typedef struct {
261256
struct warray *l_free, *l_used;
262257
size_t allocated, usable;
263-
bool locking_enabled;
264-
mp_lock lock;
265258
} st_warray;
266259

267260
extern MP_PRIVATE st_warray s_mp_warray;

0 commit comments

Comments
 (0)