Skip to content

Commit b37291f

Browse files
committed
Make this moderately less embarrassing
1 parent cafbef1 commit b37291f

9 files changed

+107
-24
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
*~
22
*.o
3+
generated/

Makefile

+19-12
Original file line numberDiff line numberDiff line change
@@ -33,40 +33,47 @@ help:
3333

3434
THIS_RUN := results-$(shell date +%Y%m%d-%H%M%S)
3535

36-
CFLAGS ?= -Wall -pedantic -std=gnu11 -O3 -g
36+
# Note that you may have to change -DHAVE_GETRANDOM to -DHAVE_RDRAND
37+
# depending on what kernel and hardware you have.
38+
CFLAGS ?= -Wall -pedantic -std=gnu11 -I. -Igenerated -O3 -g -DHAVE_GETRANDOM
3739
LDFLAGS ?= -lm
3840

3941
all: run-experiment
40-
run-experiment: clean-old-results generate-files run-trials analyze-results
41-
mkdir $(shell hostname)-$(shell date +%y%m%d)
42+
run-experiment: clean generate-files run-trials analyze-results
43+
44+
generated/:
45+
mkdir -p generated/
4246

4347
%.o: %.c
4448
$(CC) $(CFLAGS) -DN_ENTRIES=$(N_ENTRIES) -o $@ -c $^
4549

4650
HEADER_FILES := dispatch.h dummy-fns.h
47-
DRIVER_OBJS := main.o dummy-fns.o ns-$(CALL_DISTRIBUTION).o
48-
$(PLATFORM)-linear: $(PLATFORM)-linear.o $(DRIVER_OBJS)
49-
$(PLATFORM)-binary: $(PLATFORM)-binary.o $(DRIVER_OBJS)
50-
$(PLATFORM)-vtable: $(PLATFORM)-vtable.o $(DRIVER_OBJS)
51-
c-switch: c-switch.o $(DRIVER_OBJS)
52-
c-vtable: c-vtable.o $(DRIVER_OBJS)
51+
DRIVER_OBJS := main.o generated/dummy-fns.o ns-$(CALL_DISTRIBUTION).o xoroshiro128plus.o
52+
$(PLATFORM)-linear: generated/$(PLATFORM)-linear.o $(DRIVER_OBJS)
53+
$(PLATFORM)-binary: generated/$(PLATFORM)-binary.o $(DRIVER_OBJS)
54+
$(PLATFORM)-vtable: generated/$(PLATFORM)-vtable.o $(DRIVER_OBJS)
55+
c-switch: generated/c-switch.o $(DRIVER_OBJS)
56+
c-vtable: generated/c-vtable.o $(DRIVER_OBJS)
5357

5458
%.c: $(HEADER_FILES)
5559

5660
$(TRIALS):
57-
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
61+
$(CC) -static $(CFLAGS) -o $@ $^ $(LDFLAGS)
5862

59-
GENERATED_FILES := dummy-fns.h dummy-fns.c $(PLATFORM)-linear.s $(PLATFORM)-binary.s $(PLATFORM)-vtable.s c-switch.c c-vtable.c
63+
GENERATED_FILES := generated/dummy-fns.h generated/dummy-fns.c \
64+
generated/$(PLATFORM)-linear.s generated/$(PLATFORM)-binary.s \
65+
generated/$(PLATFORM)-vtable.s generated/c-switch.c generated/c-vtable.c
6066

6167
$(GENERATED_FILES): generator
6268

6369
GENERATOR_FLAGS=--fn-work=$(FN_WORK) --fn-alignment=$(FN_ALIGNMENT) --n-entries=$(N_ENTRIES)
64-
generate-files: generator
70+
generate-files: generated/ generator
6571
./generator $(GENERATOR_FLAGS) $(GENERATED_FILES)
6672

6773
run-trials: $(shell shuf -e $(TRIALS))
6874
for i in $^; do \
6975
echo running $$i in $(THIS_RUN); \
76+
perf stat -r $(N_RUNS) ./$$i $(N_DISPATCHES) ||:; \
7077
done
7178

7279
clean:

README.md

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
*Do not use this code!*
3+
4+
This is some benchmarking code I wrote several years ago and came back
5+
to a few times, in support of [a blog post I finally published]. One
6+
of the reasons I delayed finishing that post so long was that I
7+
realized *the methodology used here is flawed*. It's enough for us to
8+
make a (weak) point, but shouldn't be taken seriously.
9+
10+
I'll do something like this again soon, and hopefully that will set a
11+
better example.
12+
13+
There is both a Makefile that will do things, and a Python program
14+
(`run-experiment.py`) that will do similar things. The Makefile is
15+
broken but convenient. Many of the options for programs here don't
16+
actually do anything.
17+
18+
You have been warned.
19+
20+
[a blog post I finally published]: http://www.cipht.net/2017/10/03/are-jump-tables-always-fastest.html

dispatch.h

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
#include <stdint.h>
12

23
extern void dispatch(int state);
34
extern int next_state(void);
45

56
extern int state_trk;
7+
8+
extern uint64_t next(void);

generator

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22

3-
# parse arguments (tunable parameters)
3+
import argparse
4+
import os.path
45

56

67
def emit_dummy_fns(out, options):
@@ -9,7 +10,7 @@ def emit_dummy_fns(out, options):
910
1011
"""
1112
for i in range(options.n):
12-
s += "void fn_{0}(void) {{ state_trk += {0}; }}\n".format(i)
13+
s += "void fn_{0}(void) {{ state_trk += {0}; __builtin_ia32_clflush(dispatch); }}\n".format(i)
1314
out.write(s)
1415

1516
def emit_dummy_fns_h(out, options):
@@ -103,7 +104,7 @@ def emit_binary_amd64(out, options):
103104
def generate_binary_calls(l, i, n, last_comparison):
104105
if n <= 1: return 'jmp fn_{0}'.format(i)
105106
if n == 2: return """{2}
106-
je fn_{0}
107+
je fn_{0}
107108
jmp fn_{1}""".format(i, i+1, '' if last_comparison == i else "cmp ${0}, %edi".format(i))
108109
if n == 3: return """{3}
109110
je fn_{0}
@@ -126,12 +127,11 @@ dispatch:
126127
{}
127128
call abort\n""".format(generate_binary_calls("", 0, options.n, None)))
128129

129-
import argparse
130130

131131
def main():
132132
parser = argparse.ArgumentParser()
133-
# parser.add_argument('--fn-work', choices=['none', 'clflush'],
134-
# help='what kind of work dummy functions do (none, clflush)')
133+
parser.add_argument('--fn-work', choices=['none', 'clflush', 'memcpy'],
134+
help='what kind of work dummy functions do (none, clflush)')
135135
parser.add_argument('--cache-flush', choices=['none', 'I', 'D', 'ID'],
136136
help='whether dummy functions flush cache')
137137
parser.add_argument('--fn-alignment',
@@ -153,7 +153,7 @@ def main():
153153
'x86_64-vtable.s': lambda f: emit_vtable_amd64(f, options)}
154154
for x in options.file:
155155
with open(x, "w") as f:
156-
t[x](f)
156+
t[os.path.basename(x)](f)
157157
exit(0)
158158

159159
if __name__ == '__main__': main()

main.c

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,40 @@
11
/* Common entry point for generated code.
22
*/
33

4+
#include "dispatch.h"
45
#include <string.h>
56
#include <stdlib.h>
6-
#include "dispatch.h"
77

88
int state_trk = 0;
9+
extern uint64_t xorshift_seed[2];
10+
11+
static void seed(void);
12+
13+
#if defined(HAVE_RDRAND)
14+
static void seed(void)
15+
{
16+
asm volatile("0: rdrand %0; jnc 0b" : "=r" (xorshift_seed[0]));
17+
asm volatile("0: rdrand %0; jnc 0b" : "=r" (xorshift_seed[1]));
18+
}
19+
20+
#elif defined(HAVE_GETRANDOM)
21+
#include <linux/random.h>
22+
#include <sys/syscall.h>
23+
24+
static void seed(void)
25+
{
26+
ssize_t rv = syscall(SYS_getrandom, xorshift_seed, sizeof(xorshift_seed), 0);
27+
if (rv != sizeof(xorshift_seed)) abort();
28+
}
29+
#else
30+
#error "Need a way to seed the PRNG"
31+
#endif
932

1033
int main(int argc, char **argv)
1134
{
1235
if (argc < 2) abort();
36+
37+
seed();
1338
size_t count = atoi(argv[1]);
1439
while (--count)
1540
dispatch(next_state());

ns-pareto.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
int next_state(void)
77
{
8-
if (fmod((double)rand(), 100.0d) < 80.0d)
9-
return 42 % N_ENTRIES;
10-
return rand() % N_ENTRIES; /* you should never do this. */
8+
if (next() % 10 < 8)
9+
return 0;
10+
return next() % N_ENTRIES; /* you should never do this. */
1111
}

ns-uniform.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33

44
int next_state(void)
55
{
6-
return rand() % N_ENTRIES; /* you should never do this. */
6+
return next() % N_ENTRIES; /* you should never do this. */
77
}

xoroshiro128plus.c

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* Written in 2016 by David Blackman and Sebastiano Vigna ([email protected])
2+
3+
To the extent possible under law, the author has dedicated all copyright
4+
and related and neighboring rights to this software to the public domain
5+
worldwide. This software is distributed without any warranty.
6+
7+
See <http://creativecommons.org/publicdomain/zero/1.0/>. */
8+
9+
#include <stdint.h>
10+
11+
uint64_t xorshift_seed[2];
12+
13+
static inline uint64_t rotl(const uint64_t x, int k) {
14+
return (x << k) | (x >> (64 - k));
15+
}
16+
17+
uint64_t next(void) {
18+
const uint64_t s0 = xorshift_seed[0];
19+
uint64_t s1 = xorshift_seed[1];
20+
const uint64_t result = s0 + s1;
21+
22+
s1 ^= s0;
23+
xorshift_seed[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
24+
xorshift_seed[1] = rotl(s1, 36); // c
25+
26+
return result;
27+
}

0 commit comments

Comments
 (0)