From b53733e2faa6426caf5cfed56bcac31e3d1cd2c1 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 3 Jun 2022 12:48:37 -0400 Subject: [PATCH 1/6] Add support for profiling benchmarks --- doc/cli.rst | 2 ++ pyperf/_manager.py | 3 +++ pyperf/_process_time.py | 42 ++++++++++++++++++++++++++++--- pyperf/_runner.py | 55 ++++++++++++++++++++++++++++++++++++++--- pyperf/_utils.py | 21 ++++++++++++++++ 5 files changed, 116 insertions(+), 7 deletions(-) diff --git a/doc/cli.rst b/doc/cli.rst index 7c4229e5..3cac1f4a 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -491,6 +491,7 @@ Usage [--duplicate DUPLICATE] [-s SETUP] [--teardown TEARDOWN] + [--profile PROFILE] stmt [stmt ...] Options: @@ -512,6 +513,7 @@ Options: * ``--duplicate=DUPLICATE``: Duplicate statements (``stmt`` statements, not ``SETUP``) to reduce the overhead of the outer loop and multiply inner loops by DUPLICATE (see ``--inner-loops`` option). +* ``--profile=PROFILE``: Run the benchmark inside the cProfile profiler and output to the given file. This is a convenient way to profile a specific benchmark, but it will make the actual benchmark timings much less accurate. .. note:: timeit ``-n`` (number) and ``-r`` (repeat) options become ``-l`` (loops) and diff --git a/pyperf/_manager.py b/pyperf/_manager.py index f29af1fa..d478708d 100644 --- a/pyperf/_manager.py +++ b/pyperf/_manager.py @@ -66,6 +66,9 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe): if args.track_memory: cmd.append('--track-memory') + if args.profile: + cmd.extend(['--profile', args.profile]) + if self.runner._add_cmdline_args: self.runner._add_cmdline_args(cmd, args) diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py index d3589b28..b184e210 100644 --- a/pyperf/_process_time.py +++ b/pyperf/_process_time.py @@ -17,6 +17,7 @@ import os import subprocess import sys +import tempfile import time try: @@ -35,11 +36,31 @@ def get_max_rss(): return 0 -def bench_process(loops, args, kw): +def merge_profile_stats_files(src, dst): + """ + Merging one existing pstats file into another. + """ + import pstats + if os.path.isfile(dst): + src_stats = pstats.Stats(src) + dst_stats = pstats.Stats(dst) + dst_stats.add(src_stats) + dst_stats.dump_stats(dst) + os.unlink(src) + else: + os.rename(src, dst) + + +def bench_process(loops, args, kw, profile_filename=None): max_rss = 0 range_it = range(loops) start_time = time.perf_counter() + if profile_filename: + with tempfile.NamedTemporaryFile(suffix=".profile", delete=False) as fh: + temp_profile_filename = fh.name + args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:] + for loop in range_it: start_rss = get_max_rss() @@ -51,6 +72,8 @@ def bench_process(loops, args, kw): if exitcode != 0: print("Command failed with exit code %s" % exitcode, file=sys.stderr) + if profile_filename: + os.unlink(temp_profile_filename) sys.exit(exitcode) proc = None @@ -58,6 +81,11 @@ def bench_process(loops, args, kw): rss = get_max_rss() - start_rss max_rss = max(max_rss, rss) + if profile_filename: + merge_profile_stats_files( + temp_profile_filename, profile_filename + ) + dt = time.perf_counter() - start_time return (dt, max_rss) @@ -70,10 +98,18 @@ def main(): sys.exit(1) if len(sys.argv) < 3: - print("Usage: %s %s loops program [arg1 arg2 ...]" + print("Usage: %s %s loops program [arg1 arg2 ...] [--profile profile]" % (os.path.basename(sys.executable), __file__)) sys.exit(1) + profile_idx = sys.argv.index("--profile") + if profile_idx != -1: + profile_filename = sys.argv[profile_idx + 1] + del sys.argv[profile_idx] + del sys.argv[profile_idx] + else: + profile_filename = None + loops = int(sys.argv[1]) args = sys.argv[2:] @@ -88,7 +124,7 @@ def main(): kw['stdout'] = devnull kw['stderr'] = subprocess.STDOUT - dt, max_rss = bench_process(loops, args, kw) + dt, max_rss = bench_process(loops, args, kw, profile_filename) if devnull is not None: devnull.close() diff --git a/pyperf/_runner.py b/pyperf/_runner.py index b70e3e52..6c0d9e2b 100644 --- a/pyperf/_runner.py +++ b/pyperf/_runner.py @@ -12,7 +12,8 @@ set_highest_priority) from pyperf._formatter import format_timedelta from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable, - WritePipe, get_python_names) + WritePipe, get_python_names, + merge_profile_stats) from pyperf._worker import WorkerProcessTask @@ -48,6 +49,19 @@ def parse_python_names(names): return parts +def profiling_wrapper(func): + """ + Wrap a function to collect profiling. + """ + import cProfile + profiler = cProfile.Profile() + + def profiling_func(*args): + return profiler.runcall(func, *args) + + return profiler, profiling_func + + class CLIError(Exception): pass @@ -222,6 +236,11 @@ def __init__(self, values=None, warmups=None, processes=None, 'PYTHON as CHANGED_NAME ' 'and REF_PYTHON as REF_NAME in results') + parser.add_argument('--profile', + type=str, + help='Collect profile data using cProfile ' + 'and output to the given file.') + memory = parser.add_mutually_exclusive_group() memory.add_argument('--tracemalloc', action="store_true", help='Trace memory allocations using tracemalloc') @@ -454,12 +473,21 @@ def bench_time_func(self, name, time_func, *args, **kwargs): if not self._check_worker_task(): return None + if self.args.profile: + profiler, time_func = profiling_wrapper(time_func) + def task_func(task, loops): return time_func(loops, *args) task = WorkerProcessTask(self, name, task_func, metadata) + task.inner_loops = inner_loops - return self._main(task) + result = self._main(task) + + if self.args.profile: + merge_profile_stats(profiler, self.args.profile) + + return result def bench_func(self, name, func, *args, **kwargs): """"Benchmark func(*args).""" @@ -474,6 +502,9 @@ def bench_func(self, name, func, *args, **kwargs): if args: func = functools.partial(func, *args) + if self.args.profile: + profiler, func = profiling_wrapper(func) + def task_func(task, loops): # use fast local variables local_timer = time.perf_counter @@ -494,7 +525,12 @@ def task_func(task, loops): task = WorkerProcessTask(self, name, task_func, metadata) task.inner_loops = inner_loops - return self._main(task) + result = self._main(task) + + if self.args.profile: + merge_profile_stats(profiler, self.args.profile) + + return result def bench_async_func(self, name, func, *args, **kwargs): """Benchmark await func(*args)""" @@ -509,6 +545,9 @@ def bench_async_func(self, name, func, *args, **kwargs): if args: func = functools.partial(func, *args) + if self.args.profile: + profiler, func = profiling_wrapper(func) + def task_func(task, loops): if loops != 1: async def main(): @@ -549,7 +588,12 @@ async def main(): task = WorkerProcessTask(self, name, task_func, metadata) task.inner_loops = inner_loops - return self._main(task) + result = self._main(task) + + if self.args.profile: + merge_profile_stats(profiler, self.args.profile) + + return result def timeit(self, name, stmt=None, setup="pass", teardown="pass", inner_loops=None, duplicate=None, metadata=None, globals=None): @@ -671,6 +715,9 @@ def bench_command(self, name, command): if not self._check_worker_task(): return None + if self.args.profile: + command.extend(["--profile", self.args.profile]) + # Use lazy import to limit imports on 'import pyperf' from pyperf._command import BenchCommandTask task = BenchCommandTask(self, name, command) diff --git a/pyperf/_utils.py b/pyperf/_utils.py index 05dc54a6..99225466 100644 --- a/pyperf/_utils.py +++ b/pyperf/_utils.py @@ -405,3 +405,24 @@ def geometric_mean(data): if not data: raise ValueError("empty data") return _geometric_mean(data) + + +def merge_profile_stats(profiler, dst): + """ + Save pstats by merging into an existing file. + """ + import pstats + if os.path.isfile(dst): + try: + src_stats = pstats.Stats(profiler) + except TypeError: + # If no actual stats were collected, a TypeError is raised + # and we don't need to merge anything into the output. + return + + dst_stats = pstats.Stats(dst) + dst_stats.add(src_stats) + dst_stats.dump_stats(dst) + else: + profiler.dump_stats(dst) + From d566d5277499a8ccd8f1654fafe6a32607b2dda7 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 6 Jun 2022 09:20:08 -0400 Subject: [PATCH 2/6] Fix unit tests --- pyperf/_process_time.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py index b184e210..4d010500 100644 --- a/pyperf/_process_time.py +++ b/pyperf/_process_time.py @@ -102,8 +102,8 @@ def main(): % (os.path.basename(sys.executable), __file__)) sys.exit(1) - profile_idx = sys.argv.index("--profile") - if profile_idx != -1: + if "--profile" in sys.argv: + profile_idx = sys.argv.index("--profile") profile_filename = sys.argv[profile_idx + 1] del sys.argv[profile_idx] del sys.argv[profile_idx] From a960e67066ad22f7ec5eb032022fd5caa01da68f Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 13 Jun 2022 16:19:24 -0400 Subject: [PATCH 3/6] Add unit tests --- pyperf/tests/test_runner.py | 44 ++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py index a61053ba..b3501a82 100644 --- a/pyperf/tests/test_runner.py +++ b/pyperf/tests/test_runner.py @@ -1,5 +1,6 @@ import collections import os.path +import pstats import sys import tempfile import textwrap @@ -73,7 +74,7 @@ class ClockInfo: self.assertEqual(bench.get_nrun(), 1) return Result(runner, bench, stdout) - + def test_worker(self): result = self.exec_runner('--worker', '-l1', '-w1') self.assertRegex(result.stdout, @@ -83,6 +84,47 @@ def test_debug_single_value(self): result = self.exec_runner('--debug-single-value', '--worker') self.assertEqual(result.bench.get_nvalue(), 1) + def test_profile_time_func(self): + with tempfile.NamedTemporaryFile('wb+') as tmp: + name = tmp.name + args = ['--worker', '-l1', '-w1', '--profile', name] + runner = self.create_runner(args) + + def time_func(loops): + return 1.0 + + runner.bench_time_func('bench1', time_func) + + try: + s = pstats.Stats(name) + assert len(s.get_stats_profile().func_profiles) + finally: + if os.path.isfile(name): + os.unlink(name) + + def test_profile_func(self): + with tempfile.NamedTemporaryFile('wb+') as tmp: + name = tmp.name + args = ['--worker', '-l1', '-w1', '--profile', name] + runner = self.create_runner(args) + + def external(): + return [1] * 1000 + + def func(): + external() + return 1.0 + + runner.bench_func('bench1', func) + + try: + import pstats + s = pstats.Stats(name) + assert len(s.get_stats_profile().func_profiles) + finally: + if os.path.isfile(name): + os.unlink(name) + def test_pipe(self): rpipe, wpipe = create_pipe() with rpipe: From e2890f9c28c1f78dd01b717a3493141d3a0d52b9 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 16 Jun 2022 15:28:34 -0400 Subject: [PATCH 4/6] Use mktemp instead --- pyperf/_process_time.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyperf/_process_time.py b/pyperf/_process_time.py index 4d010500..7010195d 100644 --- a/pyperf/_process_time.py +++ b/pyperf/_process_time.py @@ -57,8 +57,7 @@ def bench_process(loops, args, kw, profile_filename=None): start_time = time.perf_counter() if profile_filename: - with tempfile.NamedTemporaryFile(suffix=".profile", delete=False) as fh: - temp_profile_filename = fh.name + temp_profile_filename = tempfile.mktemp() args = [args[0], "-m", "cProfile", "-o", temp_profile_filename] + args[1:] for loop in range_it: From 14a6e19861e1d45048b717a1857f9374f42d2769 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 17 Jun 2022 11:34:10 -0400 Subject: [PATCH 5/6] Fix tests on < py3.9 --- pyperf/tests/test_runner.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py index b3501a82..ffa1fbc1 100644 --- a/pyperf/tests/test_runner.py +++ b/pyperf/tests/test_runner.py @@ -97,7 +97,10 @@ def time_func(loops): try: s = pstats.Stats(name) - assert len(s.get_stats_profile().func_profiles) + if sys.version_info < (3, 9): + assert len(s.stats) + else: + assert len(s.get_stats_profile().func_profiles) finally: if os.path.isfile(name): os.unlink(name) @@ -120,7 +123,10 @@ def func(): try: import pstats s = pstats.Stats(name) - assert len(s.get_stats_profile().func_profiles) + if sys.version_info < (3, 9): + assert len(s.stats) + else: + assert len(s.get_stats_profile().func_profiles) finally: if os.path.isfile(name): os.unlink(name) From f27c2e965a97775441e914482fb78b4f0430523c Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 14 Jul 2022 08:18:51 -0400 Subject: [PATCH 6/6] Update pyperf/tests/test_runner.py Co-authored-by: Dong-hee Na --- pyperf/tests/test_runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyperf/tests/test_runner.py b/pyperf/tests/test_runner.py index ffa1fbc1..c456a9ae 100644 --- a/pyperf/tests/test_runner.py +++ b/pyperf/tests/test_runner.py @@ -74,7 +74,6 @@ class ClockInfo: self.assertEqual(bench.get_nrun(), 1) return Result(runner, bench, stdout) - def test_worker(self): result = self.exec_runner('--worker', '-l1', '-w1') self.assertRegex(result.stdout,